diff options
author | Jocelyn Turcotte <jocelyn.turcotte@digia.com> | 2014-08-08 14:30:41 +0200 |
---|---|---|
committer | Jocelyn Turcotte <jocelyn.turcotte@digia.com> | 2014-08-12 13:49:54 +0200 |
commit | ab0a50979b9eb4dfa3320eff7e187e41efedf7a9 (patch) | |
tree | 498dfb8a97ff3361a9f7486863a52bb4e26bb898 /chromium/third_party/WebKit/Source/core/html/parser | |
parent | 4ce69f7403811819800e7c5ae1318b2647e778d1 (diff) |
Update Chromium to beta version 37.0.2062.68
Change-Id: I188e3b5aff1bec75566014291b654eb19f5bc8ca
Reviewed-by: Andras Becsi <andras.becsi@digia.com>
Diffstat (limited to 'chromium/third_party/WebKit/Source/core/html/parser')
55 files changed, 2482 insertions, 1051 deletions
diff --git a/chromium/third_party/WebKit/Source/core/html/parser/AtomicHTMLToken.h b/chromium/third_party/WebKit/Source/core/html/parser/AtomicHTMLToken.h index 63e8c0c86dc..7254ab88480 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/AtomicHTMLToken.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/AtomicHTMLToken.h @@ -26,7 +26,7 @@ #ifndef AtomicHTMLToken_h #define AtomicHTMLToken_h -#include "HTMLElementLookupTrie.h" +#include "core/HTMLElementLookupTrie.h" #include "core/dom/Attribute.h" #include "core/html/parser/CompactHTMLToken.h" #include "core/html/parser/HTMLToken.h" @@ -150,7 +150,7 @@ public: ASSERT_NOT_REACHED(); break; case HTMLToken::DOCTYPE: - m_name = token.data(); + m_name = AtomicString(token.data()); m_doctypeData = adoptPtr(new DoctypeData()); m_doctypeData->m_hasPublicIdentifier = true; append(m_doctypeData->m_publicIdentifier, token.publicIdentifier()); @@ -163,15 +163,15 @@ public: case HTMLToken::StartTag: m_attributes.reserveInitialCapacity(token.attributes().size()); for (Vector<CompactHTMLToken::Attribute>::const_iterator it = token.attributes().begin(); it != token.attributes().end(); ++it) { - QualifiedName name(nullAtom, it->name, nullAtom); + QualifiedName name(nullAtom, AtomicString(it->name), nullAtom); // FIXME: This is N^2 for the number of attributes. if (!findAttributeInVector(m_attributes, name)) - m_attributes.append(Attribute(name, it->value)); + m_attributes.append(Attribute(name, AtomicString(it->value))); } // Fall through! case HTMLToken::EndTag: m_selfClosing = token.selfClosing(); - m_name = token.data(); + m_name = AtomicString(token.data()); break; case HTMLToken::Character: case HTMLToken::Comment: @@ -233,9 +233,7 @@ inline void AtomicHTMLToken::initializeAttributes(const HTMLToken::AttributeList if (attribute.name.isEmpty()) continue; - // FIXME: We should be able to add the following ASSERT once we fix - // https://bugs.webkit.org/show_bug.cgi?id=62971 - // ASSERT(attribute.nameRange.start); + ASSERT(attribute.nameRange.start); ASSERT(attribute.nameRange.end); ASSERT(attribute.valueRange.start); ASSERT(attribute.valueRange.end); diff --git a/chromium/third_party/WebKit/Source/core/html/parser/BackgroundHTMLParser.cpp b/chromium/third_party/WebKit/Source/core/html/parser/BackgroundHTMLParser.cpp index b91dafd40b9..66a62b8d1e7 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/BackgroundHTMLParser.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/BackgroundHTMLParser.cpp @@ -27,7 +27,7 @@ #include "core/html/parser/BackgroundHTMLParser.h" #include "core/html/parser/HTMLDocumentParser.h" -#include "core/html/parser/HTMLParserThread.h" +#include "core/html/parser/TextResourceDecoder.h" #include "core/html/parser/XSSAuditor.h" #include "wtf/MainThread.h" #include "wtf/text/TextPosition.h" @@ -76,6 +76,12 @@ static void checkThatXSSInfosAreSafeToSendToAnotherThread(const XSSInfoStream& i #endif +void BackgroundHTMLParser::start(PassRefPtr<WeakReference<BackgroundHTMLParser> > reference, PassOwnPtr<Configuration> config) +{ + new BackgroundHTMLParser(reference, config); + // Caller must free by calling stop(). +} + BackgroundHTMLParser::BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser> > reference, PassOwnPtr<Configuration> config) : m_weakFactory(reference, this) , m_token(adoptPtr(new HTMLToken)) @@ -86,16 +92,62 @@ BackgroundHTMLParser::BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHT , m_pendingTokens(adoptPtr(new CompactHTMLTokenStream)) , m_xssAuditor(config->xssAuditor.release()) , m_preloadScanner(config->preloadScanner.release()) + , m_decoder(config->decoder.release()) +{ +} + +BackgroundHTMLParser::~BackgroundHTMLParser() +{ +} + +void BackgroundHTMLParser::appendRawBytesFromParserThread(const char* data, int dataLength) { + ASSERT(m_decoder); + updateDocument(m_decoder->decode(data, dataLength)); } -void BackgroundHTMLParser::append(const String& input) +void BackgroundHTMLParser::appendRawBytesFromMainThread(PassOwnPtr<Vector<char> > buffer) +{ + ASSERT(m_decoder); + updateDocument(m_decoder->decode(buffer->data(), buffer->size())); +} + +void BackgroundHTMLParser::appendDecodedBytes(const String& input) { ASSERT(!m_input.current().isClosed()); m_input.append(input); pumpTokenizer(); } +void BackgroundHTMLParser::setDecoder(PassOwnPtr<TextResourceDecoder> decoder) +{ + ASSERT(decoder); + m_decoder = decoder; +} + +void BackgroundHTMLParser::flush() +{ + ASSERT(m_decoder); + updateDocument(m_decoder->flush()); +} + +void BackgroundHTMLParser::updateDocument(const String& decodedData) +{ + DocumentEncodingData encodingData(*m_decoder.get()); + + if (encodingData != m_lastSeenEncodingData) { + m_lastSeenEncodingData = encodingData; + + m_xssAuditor->setEncoding(encodingData.encoding()); + callOnMainThread(bind(&HTMLDocumentParser::didReceiveEncodingDataFromBackgroundParser, m_parser, encodingData)); + } + + if (decodedData.isEmpty()) + return; + + appendDecodedBytes(decodedData); +} + void BackgroundHTMLParser::resumeFrom(PassOwnPtr<Checkpoint> checkpoint) { m_parser = checkpoint->parser; diff --git a/chromium/third_party/WebKit/Source/core/html/parser/BackgroundHTMLParser.h b/chromium/third_party/WebKit/Source/core/html/parser/BackgroundHTMLParser.h index b9b5a511c49..8d315a8f23e 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/BackgroundHTMLParser.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/BackgroundHTMLParser.h @@ -26,14 +26,14 @@ #ifndef BackgroundHTMLParser_h #define BackgroundHTMLParser_h +#include "core/dom/DocumentEncodingData.h" #include "core/html/parser/BackgroundHTMLInputStream.h" #include "core/html/parser/CompactHTMLToken.h" #include "core/html/parser/HTMLParserOptions.h" #include "core/html/parser/HTMLPreloadScanner.h" #include "core/html/parser/HTMLSourceTracker.h" -#include "core/html/parser/HTMLToken.h" -#include "core/html/parser/HTMLTokenizer.h" #include "core/html/parser/HTMLTreeBuilderSimulator.h" +#include "core/html/parser/TextResourceDecoder.h" #include "core/html/parser/XSSAuditorDelegate.h" #include "wtf/PassOwnPtr.h" #include "wtf/WeakPtr.h" @@ -41,6 +41,7 @@ namespace WebCore { class HTMLDocumentParser; +class SharedBuffer; class XSSAuditor; class BackgroundHTMLParser { @@ -51,13 +52,10 @@ public: WeakPtr<HTMLDocumentParser> parser; OwnPtr<XSSAuditor> xssAuditor; OwnPtr<TokenPreloadScanner> preloadScanner; + OwnPtr<TextResourceDecoder> decoder; }; - static void create(PassRefPtr<WeakReference<BackgroundHTMLParser> > reference, PassOwnPtr<Configuration> config) - { - new BackgroundHTMLParser(reference, config); - // Caller must free by calling stop(). - } + static void start(PassRefPtr<WeakReference<BackgroundHTMLParser> >, PassOwnPtr<Configuration>); struct Checkpoint { WeakPtr<HTMLDocumentParser> parser; @@ -69,7 +67,11 @@ public: String unparsedInput; }; - void append(const String&); + void appendRawBytesFromParserThread(const char* data, int dataLength); + + void appendRawBytesFromMainThread(PassOwnPtr<Vector<char> >); + void setDecoder(PassOwnPtr<TextResourceDecoder>); + void flush(); void resumeFrom(PassOwnPtr<Checkpoint>); void startedChunkWithCheckpoint(HTMLInputCheckpoint); void finish(); @@ -79,10 +81,13 @@ public: private: BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser> >, PassOwnPtr<Configuration>); + ~BackgroundHTMLParser(); + void appendDecodedBytes(const String&); void markEndOfFile(); void pumpTokenizer(); void sendTokensToMainThread(); + void updateDocument(const String& decodedData); WeakPtrFactory<BackgroundHTMLParser> m_weakFactory; BackgroundHTMLInputStream m_input; @@ -99,6 +104,8 @@ private: OwnPtr<XSSAuditor> m_xssAuditor; OwnPtr<TokenPreloadScanner> m_preloadScanner; + OwnPtr<TextResourceDecoder> m_decoder; + DocumentEncodingData m_lastSeenEncodingData; }; } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/CSSPreloadScanner.cpp b/chromium/third_party/WebKit/Source/core/html/parser/CSSPreloadScanner.cpp index b1eea275aaa..042c52a86c8 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/CSSPreloadScanner.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/CSSPreloadScanner.cpp @@ -28,7 +28,7 @@ #include "config.h" #include "core/html/parser/CSSPreloadScanner.h" -#include "FetchInitiatorTypeNames.h" +#include "core/FetchInitiatorTypeNames.h" #include "core/html/parser/HTMLParserIdioms.h" #include "platform/text/SegmentedString.h" diff --git a/chromium/third_party/WebKit/Source/core/html/parser/CSSPreloadScanner.h b/chromium/third_party/WebKit/Source/core/html/parser/CSSPreloadScanner.h index 7161a27434e..8c328997553 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/CSSPreloadScanner.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/CSSPreloadScanner.h @@ -33,7 +33,6 @@ namespace WebCore { -class HTMLIdentifier; class SegmentedString; class CSSPreloadScanner { diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLConstructionSite.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLConstructionSite.cpp index 96dc2484b4b..71d5b6b14c8 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLConstructionSite.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLConstructionSite.cpp @@ -25,19 +25,20 @@ */ #include "config.h" -#include "core/html/parser/HTMLTreeBuilder.h" +#include "core/html/parser/HTMLConstructionSite.h" -#include "HTMLElementFactory.h" -#include "HTMLNames.h" +#include "core/HTMLElementFactory.h" +#include "core/HTMLNames.h" #include "core/dom/Comment.h" #include "core/dom/DocumentFragment.h" #include "core/dom/DocumentType.h" #include "core/dom/Element.h" #include "core/dom/ScriptLoader.h" #include "core/dom/Text.h" +#include "core/frame/LocalFrame.h" #include "core/html/HTMLFormElement.h" #include "core/html/HTMLHtmlElement.h" -#include "core/html/HTMLOptGroupElement.h" +#include "core/html/HTMLPlugInElement.h" #include "core/html/HTMLScriptElement.h" #include "core/html/HTMLTemplateElement.h" #include "core/html/parser/AtomicHTMLToken.h" @@ -46,7 +47,7 @@ #include "core/html/parser/HTMLToken.h" #include "core/loader/FrameLoader.h" #include "core/loader/FrameLoaderClient.h" -#include "core/frame/Frame.h" +#include "core/svg/SVGScriptElement.h" #include "platform/NotImplemented.h" #include "platform/text/TextBreakIterator.h" #include <limits> @@ -70,20 +71,20 @@ static bool hasImpliedEndTag(const HTMLStackItem* item) || item->hasTagName(dtTag) || item->hasTagName(liTag) || item->hasTagName(optionTag) - || isHTMLOptGroupElement(item->node()) + || item->hasTagName(optgroupTag) || item->hasTagName(pTag) || item->hasTagName(rpTag) || item->hasTagName(rtTag); } -static bool shouldUseLengthLimit(const ContainerNode* node) +static bool shouldUseLengthLimit(const ContainerNode& node) { - return !node->hasTagName(scriptTag) - && !node->hasTagName(styleTag) - && !node->hasTagName(SVGNames::scriptTag); + return !isHTMLScriptElement(node) + && !isHTMLStyleElement(node) + && !isSVGScriptElement(node); } -static unsigned textLengthLimitForContainer(const ContainerNode* node) +static unsigned textLengthLimitForContainer(const ContainerNode& node) { return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max(); } @@ -95,7 +96,7 @@ static inline bool isAllWhitespace(const String& string) static inline void insert(HTMLConstructionSiteTask& task) { - if (task.parent->hasTagName(templateTag)) + if (isHTMLTemplateElement(*task.parent)) task.parent = toHTMLTemplateElement(task.parent.get())->content(); if (ContainerNode* parent = task.child->parentNode()) @@ -113,10 +114,12 @@ static inline void executeInsertTask(HTMLConstructionSiteTask& task) insert(task); - task.child->beginParsingChildren(); - - if (task.selfClosing) - task.child->finishParsingChildren(); + if (task.child->isElementNode()) { + Element& child = toElement(*task.child); + child.beginParsingChildren(); + if (task.selfClosing) + child.finishParsingChildren(); + } } static inline void executeInsertTextTask(HTMLConstructionSiteTask& task) @@ -130,7 +133,7 @@ static inline void executeInsertTextTask(HTMLConstructionSiteTask& task) Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild(); if (previousChild && previousChild->isTextNode()) { Text* previousText = toText(previousChild); - unsigned lengthLimit = textLengthLimitForContainer(task.parent.get()); + unsigned lengthLimit = textLengthLimitForContainer(*task.parent); if (previousText->length() + newText->length() < lengthLimit) { previousText->parserAppendData(newText->data()); return; @@ -238,7 +241,7 @@ void HTMLConstructionSite::flushPendingText() // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898 - unsigned lengthLimit = textLengthLimitForContainer(pendingText.parent.get()); + unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent); unsigned currentPosition = 0; const StringBuilder& string = pendingText.stringBuilder; @@ -269,10 +272,10 @@ void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task) m_taskQueue.append(task); } -void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtr<Node> prpChild, bool selfClosing) +void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtrWillBeRawPtr<Node> prpChild, bool selfClosing) { ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get()))); - ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !prpChild->isPluginElement()); + ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !isHTMLPlugInElement(prpChild)); HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert); task.parent = parent; @@ -343,14 +346,26 @@ HTMLConstructionSite::~HTMLConstructionSite() ASSERT(m_pendingText.isEmpty()); } +void HTMLConstructionSite::trace(Visitor* visitor) +{ + visitor->trace(m_document); + visitor->trace(m_attachmentRoot); + visitor->trace(m_head); + visitor->trace(m_form); + visitor->trace(m_openElements); + visitor->trace(m_activeFormattingElements); + visitor->trace(m_taskQueue); + visitor->trace(m_pendingText); +} + void HTMLConstructionSite::detach() { // FIXME: We'd like to ASSERT here that we're canceling and not just discarding // text that really should have made it into the DOM earlier, but there // doesn't seem to be a nice way to do that. m_pendingText.discard(); - m_document = 0; - m_attachmentRoot = 0; + m_document = nullptr; + m_attachmentRoot = nullptr; } void HTMLConstructionSite::setForm(HTMLFormElement* form) @@ -360,7 +375,7 @@ void HTMLConstructionSite::setForm(HTMLFormElement* form) m_form = form; } -PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm() +PassRefPtrWillBeRawPtr<HTMLFormElement> HTMLConstructionSite::takeForm() { return m_form.release(); } @@ -375,7 +390,7 @@ void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded() void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token) { ASSERT(m_document); - RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(*m_document); + RefPtrWillBeRawPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(*m_document); setAttributes(element.get(), token, m_parserContentPolicy); attachLater(m_attachmentRoot, element); m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token)); @@ -392,7 +407,7 @@ void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* for (unsigned i = 0; i < token->attributes().size(); ++i) { const Attribute& tokenAttribute = token->attributes().at(i); - if (!element->elementData() || !element->getAttributeItem(tokenAttribute.name())) + if (!element->elementData() || !element->findAttributeByName(tokenAttribute.name())) element->setAttribute(tokenAttribute.name(), tokenAttribute.value()); } } @@ -416,8 +431,6 @@ void HTMLConstructionSite::setDefaultCompatibilityMode() { if (m_isParsingFragment) return; - if (m_document->isSrcdocDocument()) - return; setCompatibilityMode(Document::QuirksMode); } @@ -536,7 +549,7 @@ void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token) const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier()); const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier()); - RefPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId); + RefPtrWillBeRawPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId); attachLater(m_attachmentRoot, doctype.release()); // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which @@ -586,26 +599,26 @@ void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token) void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token) { ASSERT(!shouldFosterParent()); - RefPtr<Element> body = createHTMLElement(token); + RefPtrWillBeRawPtr<Element> body = createHTMLElement(token); attachLater(currentNode(), body); m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token)); - if (Frame* frame = m_document->frame()) + if (LocalFrame* frame = m_document->frame()) frame->loader().client()->dispatchWillInsertBody(); } void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted) { - RefPtr<Element> element = createHTMLElement(token); - ASSERT(element->hasTagName(formTag)); + RefPtrWillBeRawPtr<Element> element = createHTMLElement(token); + ASSERT(isHTMLFormElement(element)); m_form = static_pointer_cast<HTMLFormElement>(element.release()); m_form->setDemoted(isDemoted); - attachLater(currentNode(), m_form); - m_openElements.push(HTMLStackItem::create(m_form, token)); + attachLater(currentNode(), m_form.get()); + m_openElements.push(HTMLStackItem::create(m_form.get(), token)); } void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token) { - RefPtr<Element> element = createHTMLElement(token); + RefPtrWillBeRawPtr<Element> element = createHTMLElement(token); attachLater(currentNode(), element); m_openElements.push(HTMLStackItem::create(element.release(), token)); } @@ -639,7 +652,7 @@ void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token) // those flags or effects thereof. const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted; const bool alreadyStarted = m_isParsingFragment && parserInserted; - RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted, alreadyStarted); + RefPtrWillBeRawPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted, alreadyStarted); setAttributes(element.get(), token, m_parserContentPolicy); if (scriptingContentIsAllowed(m_parserContentPolicy)) attachLater(currentNode(), element); @@ -651,7 +664,7 @@ void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const At ASSERT(token->type() == HTMLToken::StartTag); notImplemented(); // parseError when xmlns or xmlns:xlink are wrong. - RefPtr<Element> element = createElement(token, namespaceURI); + RefPtrWillBeRawPtr<Element> element = createElement(token, namespaceURI); if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get())) attachLater(currentNode(), element, token->selfClosing()); if (!token->selfClosing()) @@ -667,7 +680,7 @@ void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode w findFosterSite(dummyTask); // FIXME: This probably doesn't need to be done both here and in insert(Task). - if (dummyTask.parent->hasTagName(templateTag)) + if (isHTMLTemplateElement(*dummyTask.parent)) dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content(); // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token. @@ -715,22 +728,22 @@ void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElement queueTask(task); } -PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI) +PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI) { QualifiedName tagName(nullAtom, token->name(), namespaceURI); - RefPtr<Element> element = ownerDocumentForCurrentNode().createElement(tagName, true); + RefPtrWillBeRawPtr<Element> element = ownerDocumentForCurrentNode().createElement(tagName, true); setAttributes(element.get(), token, m_parserContentPolicy); return element.release(); } inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode() { - if (currentNode()->hasTagName(templateTag)) + if (isHTMLTemplateElement(*currentNode())) return toHTMLTemplateElement(currentElement())->content()->document(); return currentNode()->document(); } -PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token) +PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token) { Document& document = ownerDocumentForCurrentNode(); // Only associate the element with the current form if we're creating the new element @@ -739,15 +752,15 @@ PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* tok // FIXME: This can't use HTMLConstructionSite::createElement because we // have to pass the current form element. We should rework form association // to occur after construction to allow better code sharing here. - RefPtr<Element> element = HTMLElementFactory::createHTMLElement(token->name(), document, form, true); + RefPtrWillBeRawPtr<Element> element = HTMLElementFactory::createHTMLElement(token->name(), document, form, true); setAttributes(element.get(), token, m_parserContentPolicy); ASSERT(element->isHTMLElement()); return element.release(); } -PassRefPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item) +PassRefPtrWillBeRawPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item) { - RefPtr<Element> element; + RefPtrWillBeRawPtr<Element> element; // NOTE: Moving from item -> token -> item copies the Attribute vector twice! AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes()); if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI) @@ -784,7 +797,7 @@ void HTMLConstructionSite::reconstructTheActiveFormattingElements() ASSERT(unopenEntryIndex < m_activeFormattingElements.size()); for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) { HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex); - RefPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get()); + RefPtrWillBeRawPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get()); attachLater(currentNode(), reconstructed->node()); m_openElements.push(reconstructed); unopenedEntry.replaceElement(reconstructed.release()); @@ -847,7 +860,7 @@ bool HTMLConstructionSite::shouldFosterParent() const && currentStackItem()->causesFosterParenting(); } -void HTMLConstructionSite::fosterParent(PassRefPtr<Node> node) +void HTMLConstructionSite::fosterParent(PassRefPtrWillBeRawPtr<Node> node) { HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert); findFosterSite(task); @@ -856,4 +869,11 @@ void HTMLConstructionSite::fosterParent(PassRefPtr<Node> node) queueTask(task); } +void HTMLConstructionSite::PendingText::trace(Visitor* visitor) +{ + visitor->trace(parent); + visitor->trace(nextChild); +} + + } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLConstructionSite.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLConstructionSite.h index d4d812eb109..0bd25a0fa62 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLConstructionSite.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLConstructionSite.h @@ -27,6 +27,7 @@ #ifndef HTMLConstructionSite_h #define HTMLConstructionSite_h +#include "core/dom/Document.h" #include "core/dom/ParserContentPolicy.h" #include "core/html/parser/HTMLElementStack.h" #include "core/html/parser/HTMLFormattingElementList.h" @@ -39,6 +40,8 @@ namespace WebCore { struct HTMLConstructionSiteTask { + ALLOW_ONLY_INLINE_ALLOCATION(); +public: enum Operation { Insert, InsertText, // Handles possible merging of text nodes. @@ -53,6 +56,13 @@ struct HTMLConstructionSiteTask { { } + void trace(Visitor* visitor) + { + visitor->trace(parent); + visitor->trace(nextChild); + visitor->trace(child); + } + ContainerNode* oldParent() { // It's sort of ugly, but we store the |oldParent| in the |child| field @@ -62,17 +72,15 @@ struct HTMLConstructionSiteTask { } Operation operation; - RefPtr<ContainerNode> parent; - RefPtr<Node> nextChild; - RefPtr<Node> child; + RefPtrWillBeMember<ContainerNode> parent; + RefPtrWillBeMember<Node> nextChild; + RefPtrWillBeMember<Node> child; bool selfClosing; }; } // namespace WebCore -namespace WTF { -template<> struct VectorTraits<WebCore::HTMLConstructionSiteTask> : SimpleClassVectorTraits { }; -} // namespace WTF +WTF_ALLOW_MOVE_INIT_AND_COMPARE_WITH_MEM_FUNCTIONS(WebCore::HTMLConstructionSiteTask); namespace WebCore { @@ -89,12 +97,14 @@ class Document; class Element; class HTMLFormElement; -class HTMLConstructionSite { +class HTMLConstructionSite FINAL { WTF_MAKE_NONCOPYABLE(HTMLConstructionSite); + DISALLOW_ALLOCATION(); public: HTMLConstructionSite(Document*, ParserContentPolicy); HTMLConstructionSite(DocumentFragment*, ParserContentPolicy); ~HTMLConstructionSite(); + void trace(Visitor*); void detach(); @@ -150,10 +160,10 @@ public: void insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child); void takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent); - PassRefPtr<HTMLStackItem> createElementFromSavedToken(HTMLStackItem*); + PassRefPtrWillBeRawPtr<HTMLStackItem> createElementFromSavedToken(HTMLStackItem*); bool shouldFosterParent() const; - void fosterParent(PassRefPtr<Node>); + void fosterParent(PassRefPtrWillBeRawPtr<Node>); bool indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const; void reconstructTheActiveFormattingElements(); @@ -179,7 +189,7 @@ public: void setForm(HTMLFormElement*); HTMLFormElement* form() const { return m_form.get(); } - PassRefPtr<HTMLFormElement> takeForm(); + PassRefPtrWillBeRawPtr<HTMLFormElement> takeForm(); ParserContentPolicy parserContentPolicy() { return m_parserContentPolicy; } @@ -206,17 +216,17 @@ public: private: // In the common case, this queue will have only one task because most // tokens produce only one DOM mutation. - typedef Vector<HTMLConstructionSiteTask, 1> TaskQueue; + typedef WillBeHeapVector<HTMLConstructionSiteTask, 1> TaskQueue; void setCompatibilityMode(Document::CompatibilityMode); void setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId); - void attachLater(ContainerNode* parent, PassRefPtr<Node> child, bool selfClosing = false); + void attachLater(ContainerNode* parent, PassRefPtrWillBeRawPtr<Node> child, bool selfClosing = false); void findFosterSite(HTMLConstructionSiteTask&); - PassRefPtr<Element> createHTMLElement(AtomicHTMLToken*); - PassRefPtr<Element> createElement(AtomicHTMLToken*, const AtomicString& namespaceURI); + PassRefPtrWillBeRawPtr<Element> createHTMLElement(AtomicHTMLToken*); + PassRefPtrWillBeRawPtr<Element> createElement(AtomicHTMLToken*, const AtomicString& namespaceURI); void mergeAttributesFromTokenIntoElement(AtomicHTMLToken*, Element*); void dispatchDocumentElementAvailableIfNeeded(); @@ -224,27 +234,29 @@ private: void executeTask(HTMLConstructionSiteTask&); void queueTask(const HTMLConstructionSiteTask&); - Document* m_document; + RawPtrWillBeMember<Document> m_document; // This is the root ContainerNode to which the parser attaches all newly // constructed nodes. It points to a DocumentFragment when parsing fragments // and a Document in all other cases. - ContainerNode* m_attachmentRoot; + RawPtrWillBeMember<ContainerNode> m_attachmentRoot; - RefPtr<HTMLStackItem> m_head; - RefPtr<HTMLFormElement> m_form; + RefPtrWillBeMember<HTMLStackItem> m_head; + RefPtrWillBeMember<HTMLFormElement> m_form; mutable HTMLElementStack m_openElements; mutable HTMLFormattingElementList m_activeFormattingElements; TaskQueue m_taskQueue; - struct PendingText { + class PendingText FINAL { + DISALLOW_ALLOCATION(); + public: PendingText() : whitespaceMode(WhitespaceUnknown) { } - void append(PassRefPtr<ContainerNode> newParent, PassRefPtr<Node> newNextChild, const String& newString, WhitespaceMode newWhitespaceMode) + void append(PassRefPtrWillBeRawPtr<ContainerNode> newParent, PassRefPtrWillBeRawPtr<Node> newNextChild, const String& newString, WhitespaceMode newWhitespaceMode) { ASSERT(!parent || parent == newParent); parent = newParent; @@ -277,8 +289,10 @@ private: return stringBuilder.isEmpty(); } - RefPtr<ContainerNode> parent; - RefPtr<Node> nextChild; + void trace(Visitor*); + + RefPtrWillBeMember<ContainerNode> parent; + RefPtrWillBeMember<Node> nextChild; StringBuilder stringBuilder; WhitespaceMode whitespaceMode; }; diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLDocumentParser.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLDocumentParser.cpp index 2008ae5f0e1..e5a4d251b1c 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLDocumentParser.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLDocumentParser.cpp @@ -26,9 +26,11 @@ #include "config.h" #include "core/html/parser/HTMLDocumentParser.h" -#include "HTMLNames.h" +#include "core/HTMLNames.h" +#include "core/css/MediaValuesCached.h" #include "core/dom/DocumentFragment.h" #include "core/dom/Element.h" +#include "core/frame/LocalFrame.h" #include "core/html/HTMLDocument.h" #include "core/html/parser/AtomicHTMLToken.h" #include "core/html/parser/BackgroundHTMLParser.h" @@ -37,8 +39,11 @@ #include "core/html/parser/HTMLScriptRunner.h" #include "core/html/parser/HTMLTreeBuilder.h" #include "core/inspector/InspectorInstrumentation.h" -#include "core/frame/Frame.h" +#include "core/inspector/InspectorTraceEvents.h" +#include "core/loader/DocumentLoader.h" +#include "platform/SharedBuffer.h" #include "platform/TraceEvent.h" +#include "public/platform/WebThreadedDataReceiver.h" #include "wtf/Functional.h" namespace WebCore { @@ -70,17 +75,44 @@ static HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElem return HTMLTokenizer::DataState; } -HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors) +class ParserDataReceiver : public blink::WebThreadedDataReceiver { +public: + explicit ParserDataReceiver(WeakPtr<BackgroundHTMLParser> backgroundParser) + : m_backgroundParser(backgroundParser) + { + } + + // WebThreadedDataReceiver + virtual void acceptData(const char* data, int dataLength) OVERRIDE FINAL + { + ASSERT(backgroundThread() && backgroundThread()->isCurrentThread()); + if (m_backgroundParser.get()) + m_backgroundParser.get()->appendRawBytesFromParserThread(data, dataLength); + } + + virtual blink::WebThread* backgroundThread() OVERRIDE FINAL + { + if (HTMLParserThread::shared()) + return &HTMLParserThread::shared()->platformThread(); + + return 0; + } + +private: + WeakPtr<BackgroundHTMLParser> m_backgroundParser; +}; + +HTMLDocumentParser::HTMLDocumentParser(HTMLDocument& document, bool reportErrors) : ScriptableDocumentParser(document) - , m_options(document) + , m_options(&document) , m_token(m_options.useThreading ? nullptr : adoptPtr(new HTMLToken)) , m_tokenizer(m_options.useThreading ? nullptr : HTMLTokenizer::create(m_options)) - , m_scriptRunner(HTMLScriptRunner::create(document, this)) - , m_treeBuilder(HTMLTreeBuilder::create(this, document, parserContentPolicy(), reportErrors, m_options)) + , m_scriptRunner(HTMLScriptRunner::create(&document, this)) + , m_treeBuilder(HTMLTreeBuilder::create(this, &document, parserContentPolicy(), reportErrors, m_options)) , m_parserScheduler(HTMLParserScheduler::create(this)) - , m_xssAuditorDelegate(document) + , m_xssAuditorDelegate(&document) , m_weakFactory(this) - , m_preloader(adoptPtr(new HTMLResourcePreloader(document))) + , m_preloader(adoptPtr(new HTMLResourcePreloader(&document))) , m_isPinnedToMainThread(false) , m_endWasDelayed(false) , m_haveBackgroundParser(false) @@ -92,7 +124,7 @@ HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors // FIXME: Member variables should be grouped into self-initializing structs to // minimize code duplication between these constructors. HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) - : ScriptableDocumentParser(&fragment->document(), parserContentPolicy) + : ScriptableDocumentParser(fragment->document(), parserContentPolicy) , m_options(&fragment->document()) , m_token(adoptPtr(new HTMLToken)) , m_tokenizer(HTMLTokenizer::create(m_options)) @@ -112,6 +144,12 @@ HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* cont HTMLDocumentParser::~HTMLDocumentParser() { +#if ENABLE(OILPAN) + if (m_haveBackgroundParser) + stopBackgroundParser(); + // In Oilpan, HTMLDocumentParser can die together with Document, and + // detach() is not called in this case. +#else ASSERT(!m_parserScheduler); ASSERT(!m_pumpSessionNestingLevel); ASSERT(!m_preloadScanner); @@ -120,6 +158,15 @@ HTMLDocumentParser::~HTMLDocumentParser() // FIXME: We should be able to ASSERT(m_speculations.isEmpty()), // but there are cases where that's not true currently. For example, // we we're told to stop parsing before we've consumed all the input. +#endif +} + +void HTMLDocumentParser::trace(Visitor* visitor) +{ + visitor->trace(m_treeBuilder); + visitor->trace(m_scriptRunner); + ScriptableDocumentParser::trace(visitor); + HTMLScriptRunnerHost::trace(visitor); } void HTMLDocumentParser::pinToMainThread() @@ -167,7 +214,7 @@ void HTMLDocumentParser::prepareToStopParsing() // pumpTokenizer can cause this parser to be detached from the Document, // but we need to ensure it isn't deleted yet. - RefPtr<HTMLDocumentParser> protect(this); + RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this); // NOTE: This pump should only ever emit buffered character tokens, // so ForceSynchronous vs. AllowYield should be meaningless. @@ -230,7 +277,7 @@ void HTMLDocumentParser::resumeParsingAfterYield() ASSERT(!m_isPinnedToMainThread); // pumpTokenizer can cause this parser to be detached from the Document, // but we need to ensure it isn't deleted yet. - RefPtr<HTMLDocumentParser> protect(this); + RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this); if (m_haveBackgroundParser) { pumpPendingSpeculations(); @@ -248,7 +295,7 @@ void HTMLDocumentParser::runScriptsForPausedTreeBuilder() ASSERT(scriptingContentIsAllowed(parserContentPolicy())); TextPosition scriptStartPosition = TextPosition::belowRangePosition(); - RefPtr<Element> scriptElement = m_treeBuilder->takeScriptToProcess(scriptStartPosition); + RefPtrWillBeRawPtr<Element> scriptElement = m_treeBuilder->takeScriptToProcess(scriptStartPosition); // We will not have a scriptRunner when parsing a DocumentFragment. if (m_scriptRunner) m_scriptRunner->execute(scriptElement.release(), scriptStartPosition); @@ -263,7 +310,7 @@ bool HTMLDocumentParser::canTakeNextToken(SynchronousMode mode, PumpSession& ses if (isWaitingForScripts()) { if (mode == AllowYield) - m_parserScheduler->checkForYieldBeforeScript(session); + session.didSeeScript = true; // If we don't run the script, we cannot allow the next token to be taken. if (session.needsYield) @@ -278,7 +325,7 @@ bool HTMLDocumentParser::canTakeNextToken(SynchronousMode mode, PumpSession& ses } // FIXME: It's wrong for the HTMLDocumentParser to reach back to the - // Frame, but this approach is how the old parser handled + // LocalFrame, but this approach is how the old parser handled // stopping when the page assigns window.location. What really // should happen is that assigning window.location causes the // parser to stop parsing cleanly. The problem is we're not @@ -308,7 +355,7 @@ void HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<Pa // processParsedChunkFromBackgroundParser can cause this parser to be detached from the Document, // but we need to ensure it isn't deleted yet. - RefPtr<HTMLDocumentParser> protect(this); + RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this); ASSERT(m_speculations.isEmpty()); chunk->preloads.clear(); // We don't need to preload because we're going to parse immediately. @@ -316,6 +363,11 @@ void HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<Pa pumpPendingSpeculations(); } +void HTMLDocumentParser::didReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData& data) +{ + document()->setEncodingData(data); +} + void HTMLDocumentParser::validateSpeculations(PassOwnPtr<ParsedChunk> chunk) { ASSERT(chunk); @@ -382,8 +434,10 @@ void HTMLDocumentParser::processParsedChunkFromBackgroundParser(PassOwnPtr<Parse ASSERT(!isParsingFragment()); ASSERT(!isWaitingForScripts()); ASSERT(!isStopped()); +#if !ENABLE(OILPAN) // ASSERT that this object is both attached to the Document and protected. ASSERT(refCount() >= 2); +#endif ASSERT(shouldUseThreading()); ASSERT(!m_tokenizer); ASSERT(!m_token); @@ -406,8 +460,7 @@ void HTMLDocumentParser::processParsedChunkFromBackgroundParser(PassOwnPtr<Parse for (Vector<CompactHTMLToken>::const_iterator it = tokens->begin(); it != tokens->end(); ++it) { ASSERT(!isWaitingForScripts()); - if (!isParsingFragment() - && document()->frame() && document()->frame()->navigationScheduler().locationChangePending()) { + if (document()->frame() && document()->frame()->navigationScheduler().locationChangePending()) { // To match main-thread parser behavior (which never checks locationChangePending on the EOF path) // we peek to see if this chunk has an EOF and process it anyway. @@ -442,6 +495,10 @@ void HTMLDocumentParser::processParsedChunkFromBackgroundParser(PassOwnPtr<Parse ASSERT(!m_tokenizer); ASSERT(!m_token); } + + // Make sure any pending text nodes are emitted before returning. + if (!isStopped()) + m_treeBuilder->flush(); } void HTMLDocumentParser::pumpPendingSpeculations() @@ -449,8 +506,10 @@ void HTMLDocumentParser::pumpPendingSpeculations() // FIXME: Share this constant with the parser scheduler. const double parserTimeLimit = 0.500; +#if !ENABLE(OILPAN) // ASSERT that this object is both attached to the Document and protected. ASSERT(refCount() >= 2); +#endif // If this assert fails, you need to call validateSpeculations to make sure // m_tokenizer and m_token don't have state that invalidates m_speculations. ASSERT(!m_tokenizer); @@ -460,6 +519,9 @@ void HTMLDocumentParser::pumpPendingSpeculations() ASSERT(!isStopped()); // FIXME: Pass in current input length. + TRACE_EVENT_BEGIN1(TRACE_DISABLED_BY_DEFAULT("devtools.timeline"), "ParseHTML", "beginData", InspectorParseHtmlEvent::beginData(document(), lineNumber().zeroBasedInt())); + TRACE_EVENT_INSTANT1(TRACE_DISABLED_BY_DEFAULT("devtools.timeline.stack"), "CallStack", "stack", InspectorCallStackEvent::currentCallStack()); + // FIXME(361045): remove InspectorInstrumentation calls once DevTools Timeline migrates to tracing. InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), lineNumber().zeroBasedInt()); double startTime = currentTime(); @@ -467,11 +529,8 @@ void HTMLDocumentParser::pumpPendingSpeculations() while (!m_speculations.isEmpty()) { processParsedChunkFromBackgroundParser(m_speculations.takeFirst()); - // The order matters! If this isStopped(), isWaitingForScripts() can hit and ASSERT since - // m_document can be null which is used to decide the readiness. - if (isStopped()) - break; - if (isWaitingForScripts()) + // Always check isStopped first as m_document may be null. + if (isStopped() || isWaitingForScripts()) break; if (currentTime() - startTime > parserTimeLimit && !m_speculations.isEmpty()) { @@ -480,7 +539,10 @@ void HTMLDocumentParser::pumpPendingSpeculations() } } + TRACE_EVENT_END1(TRACE_DISABLED_BY_DEFAULT("devtools.timeline"), "ParseHTML", "endLine", lineNumber().zeroBasedInt()); + // FIXME(361045): remove InspectorInstrumentation calls once DevTools Timeline migrates to tracing. InspectorInstrumentation::didWriteHTML(cookie, lineNumber().zeroBasedInt()); + TRACE_EVENT_INSTANT1(TRACE_DISABLED_BY_DEFAULT("devtools.timeline"), "UpdateCounters", "data", InspectorUpdateCountersEvent::data()); } void HTMLDocumentParser::forcePlaintextForTextDocument() @@ -505,12 +567,22 @@ Document* HTMLDocumentParser::contextForParsingSession() return document(); } +static PassRefPtr<MediaValues> createMediaValues(Document* document) +{ + ASSERT(document); + RefPtr<MediaValues> mediaValues = MediaValuesCached::create(*document); + ASSERT(mediaValues->isSafeToSendToAnotherThread()); + return mediaValues; +} + void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) { ASSERT(!isStopped()); ASSERT(!isScheduledForResume()); +#if !ENABLE(OILPAN) // ASSERT that this object is both attached to the Document and protected. ASSERT(refCount() >= 2); +#endif ASSERT(m_tokenizer); ASSERT(m_token); ASSERT(!m_haveBackgroundParser || mode == ForceSynchronous); @@ -522,6 +594,9 @@ void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) // FIXME: m_input.current().length() is only accurate if we // end up parsing the whole buffer in this pump. We should pass how // much we parsed as part of didWriteHTML instead of willWriteHTML. + TRACE_EVENT_BEGIN1(TRACE_DISABLED_BY_DEFAULT("devtools.timeline"), "ParseHTML", "beginData", InspectorParseHtmlEvent::beginData(document(), m_input.current().currentLine().zeroBasedInt())); + TRACE_EVENT_INSTANT1(TRACE_DISABLED_BY_DEFAULT("devtools.timeline.stack"), "CallStack", "stack", InspectorCallStackEvent::currentCallStack()); + // FIXME(361045): remove InspectorInstrumentation calls once DevTools Timeline migrates to tracing. InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), m_input.current().currentLine().zeroBasedInt()); m_xssAuditor.init(document(), &m_xssAuditorDelegate); @@ -546,9 +621,11 @@ void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) ASSERT(token().isUninitialized()); } +#if !ENABLE(OILPAN) // Ensure we haven't been totally deref'ed after pumping. Any caller of this // function should be holding a RefPtr to this to ensure we weren't deleted. ASSERT(refCount() >= 1); +#endif if (isStopped()) return; @@ -565,12 +642,14 @@ void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) if (isWaitingForScripts()) { ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState); if (!m_preloadScanner) { - m_preloadScanner = adoptPtr(new HTMLPreloadScanner(m_options, document()->url(), document()->devicePixelRatio())); + m_preloadScanner = adoptPtr(new HTMLPreloadScanner(m_options, document()->url(), createMediaValues(document()))); m_preloadScanner->appendToEnd(m_input.current()); } m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); } + TRACE_EVENT_END1(TRACE_DISABLED_BY_DEFAULT("devtools.timeline"), "ParseHTML", "endLine", m_input.current().currentLine().zeroBasedInt()); + // FIXME(361045): remove InspectorInstrumentation calls once DevTools Timeline migrates to tracing. InspectorInstrumentation::didWriteHTML(cookie, m_input.current().currentLine().zeroBasedInt()); } @@ -620,11 +699,11 @@ void HTMLDocumentParser::insert(const SegmentedString& source) if (isStopped()) return; - TRACE_EVENT0("webkit", "HTMLDocumentParser::insert"); + TRACE_EVENT1("webkit", "HTMLDocumentParser::insert", "source_length", source.length()); // pumpTokenizer can cause this parser to be detached from the Document, // but we need to ensure it isn't deleted yet. - RefPtr<HTMLDocumentParser> protect(this); + RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this); if (!m_tokenizer) { ASSERT(!inPumpSession()); @@ -642,7 +721,7 @@ void HTMLDocumentParser::insert(const SegmentedString& source) // Check the document.write() output with a separate preload scanner as // the main scanner can't deal with insertions. if (!m_insertionPreloadScanner) - m_insertionPreloadScanner = adoptPtr(new HTMLPreloadScanner(m_options, document()->url(), document()->devicePixelRatio())); + m_insertionPreloadScanner = adoptPtr(new HTMLPreloadScanner(m_options, document()->url(), createMediaValues(document()))); m_insertionPreloadScanner->appendToEnd(source); m_insertionPreloadScanner->scan(m_preloader.get(), document()->baseElementURL()); @@ -653,6 +732,7 @@ void HTMLDocumentParser::insert(const SegmentedString& source) void HTMLDocumentParser::startBackgroundParser() { + ASSERT(!isStopped()); ASSERT(shouldUseThreading()); ASSERT(!m_haveBackgroundParser); m_haveBackgroundParser = true; @@ -660,16 +740,21 @@ void HTMLDocumentParser::startBackgroundParser() RefPtr<WeakReference<BackgroundHTMLParser> > reference = WeakReference<BackgroundHTMLParser>::createUnbound(); m_backgroundParser = WeakPtr<BackgroundHTMLParser>(reference); + // TODO(oysteine): Disabled due to crbug.com/398076 until a full fix can be implemented. + if (RuntimeEnabledFeatures::threadedParserDataReceiverEnabled()) + document()->loader()->attachThreadedDataReceiver(adoptPtr(new ParserDataReceiver(m_backgroundParser))); + OwnPtr<BackgroundHTMLParser::Configuration> config = adoptPtr(new BackgroundHTMLParser::Configuration); config->options = m_options; config->parser = m_weakFactory.createWeakPtr(); config->xssAuditor = adoptPtr(new XSSAuditor); config->xssAuditor->init(document(), &m_xssAuditorDelegate); - config->preloadScanner = adoptPtr(new TokenPreloadScanner(document()->url().copy(), document()->devicePixelRatio())); + config->preloadScanner = adoptPtr(new TokenPreloadScanner(document()->url().copy(), createMediaValues(document()))); + config->decoder = takeDecoder(); ASSERT(config->xssAuditor->isSafeToSendToAnotherThread()); ASSERT(config->preloadScanner->isSafeToSendToAnotherThread()); - HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::create, reference.release(), config.release())); + HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::start, reference.release(), config.release())); } void HTMLDocumentParser::stopBackgroundParser() @@ -687,23 +772,13 @@ void HTMLDocumentParser::append(PassRefPtr<StringImpl> inputSource) if (isStopped()) return; - if (shouldUseThreading()) { - if (!m_haveBackgroundParser) - startBackgroundParser(); - - ASSERT(inputSource->hasOneRef()); - TRACE_EVENT1("net", "HTMLDocumentParser::append", "size", inputSource->length()); - // NOTE: Important that the String temporary is destroyed before we post the task - // otherwise the String could call deref() on a StringImpl now owned by the background parser. - // We would like to ASSERT(closure.arg3()->hasOneRef()) but sadly the args are private. - Closure closure = bind(&BackgroundHTMLParser::append, m_backgroundParser, String(inputSource)); - HTMLParserThread::shared()->postTask(closure); - return; - } + // We should never reach this point if we're using a parser thread, + // as appendBytes() will directly ship the data to the thread. + ASSERT(!shouldUseThreading()); // pumpTokenizer can cause this parser to be detached from the Document, // but we need to ensure it isn't deleted yet. - RefPtr<HTMLDocumentParser> protect(this); + RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this); TRACE_EVENT1("net", "HTMLDocumentParser::append", "size", inputSource->length()); String source(inputSource); @@ -876,7 +951,7 @@ void HTMLDocumentParser::resumeParsingAfterScriptExecution() ASSERT(!m_lastChunkBeforeScript); // processParsedChunkFromBackgroundParser can cause this parser to be detached from the Document, // but we need to ensure it isn't deleted yet. - RefPtr<HTMLDocumentParser> protect(this); + RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this); pumpPendingSpeculations(); return; } @@ -886,20 +961,6 @@ void HTMLDocumentParser::resumeParsingAfterScriptExecution() endIfDelayed(); } -void HTMLDocumentParser::watchForLoad(Resource* resource) -{ - ASSERT(!resource->isLoaded()); - // addClient would call notifyFinished if the load were complete. - // Callers do not expect to be re-entered from this call, so they should - // not an already-loaded Resource. - resource->addClient(this); -} - -void HTMLDocumentParser::stopWatchingForLoad(Resource* resource) -{ - resource->removeClient(this); -} - void HTMLDocumentParser::appendCurrentInputStreamToPreloadScannerAndScan() { ASSERT(m_preloadScanner); @@ -907,11 +968,11 @@ void HTMLDocumentParser::appendCurrentInputStreamToPreloadScannerAndScan() m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); } -void HTMLDocumentParser::notifyFinished(Resource* cachedResource) +void HTMLDocumentParser::notifyScriptLoaded(Resource* cachedResource) { // pumpTokenizer can cause this parser to be detached from the Document, // but we need to ensure it isn't deleted yet. - RefPtr<HTMLDocumentParser> protect(this); + RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this); ASSERT(m_scriptRunner); ASSERT(!isExecutingScript()); @@ -938,7 +999,7 @@ void HTMLDocumentParser::executeScriptsWaitingForResources() // pumpTokenizer can cause this parser to be detached from the Document, // but we need to ensure it isn't deleted yet. - RefPtr<HTMLDocumentParser> protect(this); + RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this); m_scriptRunner->executeScriptsWaitingForResources(); if (!isWaitingForScripts()) resumeParsingAfterScriptExecution(); @@ -946,7 +1007,7 @@ void HTMLDocumentParser::executeScriptsWaitingForResources() void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) { - RefPtr<HTMLDocumentParser> parser = HTMLDocumentParser::create(fragment, contextElement, parserContentPolicy); + RefPtrWillBeRawPtr<HTMLDocumentParser> parser = HTMLDocumentParser::create(fragment, contextElement, parserContentPolicy); parser->insert(source); // Use insert() so that the parser will not yield. parser->finish(); ASSERT(!parser->processingData()); // Make sure we're done. <rdar://problem/3963151> @@ -965,4 +1026,45 @@ void HTMLDocumentParser::resumeScheduledTasks() m_parserScheduler->resume(); } +void HTMLDocumentParser::appendBytes(const char* data, size_t length) +{ + if (!length || isStopped()) + return; + + if (shouldUseThreading()) { + if (!m_haveBackgroundParser) + startBackgroundParser(); + + OwnPtr<Vector<char> > buffer = adoptPtr(new Vector<char>(length)); + memcpy(buffer->data(), data, length); + TRACE_EVENT1("net", "HTMLDocumentParser::appendBytes", "size", (unsigned)length); + + HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::appendRawBytesFromMainThread, m_backgroundParser, buffer.release())); + return; + } + + DecodedDataDocumentParser::appendBytes(data, length); +} + +void HTMLDocumentParser::flush() +{ + // If we've got no decoder, we never received any data. + if (isDetached() || needsDecoder()) + return; + + if (m_haveBackgroundParser) + HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::flush, m_backgroundParser)); + else + DecodedDataDocumentParser::flush(); +} + +void HTMLDocumentParser::setDecoder(PassOwnPtr<TextResourceDecoder> decoder) +{ + ASSERT(decoder); + DecodedDataDocumentParser::setDecoder(decoder); + + if (m_haveBackgroundParser) + HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::setDecoder, m_backgroundParser, takeDecoder())); +} + } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLDocumentParser.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLDocumentParser.h index 40779561ad3..227cf522507 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLDocumentParser.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLDocumentParser.h @@ -40,6 +40,7 @@ #include "core/html/parser/HTMLToken.h" #include "core/html/parser/HTMLTokenizer.h" #include "core/html/parser/HTMLTreeBuilderSimulator.h" +#include "core/html/parser/TextResourceDecoder.h" #include "core/html/parser/XSSAuditor.h" #include "core/html/parser/XSSAuditorDelegate.h" #include "platform/text/SegmentedString.h" @@ -64,14 +65,16 @@ class ScriptSourceCode; class PumpSession; -class HTMLDocumentParser : public ScriptableDocumentParser, HTMLScriptRunnerHost, ResourceClient { - WTF_MAKE_FAST_ALLOCATED; +class HTMLDocumentParser : public ScriptableDocumentParser, private HTMLScriptRunnerHost { + WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED; + WILL_BE_USING_GARBAGE_COLLECTED_MIXIN(HTMLDocumentParser); public: - static PassRefPtr<HTMLDocumentParser> create(HTMLDocument* document, bool reportErrors) + static PassRefPtrWillBeRawPtr<HTMLDocumentParser> create(HTMLDocument& document, bool reportErrors) { - return adoptRef(new HTMLDocumentParser(document, reportErrors)); + return adoptRefWillBeNoop(new HTMLDocumentParser(document, reportErrors)); } virtual ~HTMLDocumentParser(); + virtual void trace(Visitor*) OVERRIDE; // Exposed for HTMLParserScheduler void resumeParsingAfterYield(); @@ -80,11 +83,11 @@ public: HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); } - virtual TextPosition textPosition() const; - virtual OrdinalNumber lineNumber() const; + virtual TextPosition textPosition() const OVERRIDE FINAL; + virtual OrdinalNumber lineNumber() const OVERRIDE FINAL; - virtual void suspendScheduledTasks(); - virtual void resumeScheduledTasks(); + virtual void suspendScheduledTasks() OVERRIDE FINAL; + virtual void resumeScheduledTasks() OVERRIDE FINAL; struct ParsedChunk { OwnPtr<CompactHTMLTokenStream> tokens; @@ -96,15 +99,20 @@ public: TokenPreloadScannerCheckpoint preloadScannerCheckpoint; }; void didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>); + void didReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData&); + + virtual void appendBytes(const char* bytes, size_t length) OVERRIDE; + virtual void flush() OVERRIDE FINAL; + virtual void setDecoder(PassOwnPtr<TextResourceDecoder>) OVERRIDE FINAL; UseCounter* useCounter() { return UseCounter::getFrom(contextForParsingSession()); } protected: - virtual void insert(const SegmentedString&) OVERRIDE; + virtual void insert(const SegmentedString&) OVERRIDE FINAL; virtual void append(PassRefPtr<StringImpl>) OVERRIDE; - virtual void finish() OVERRIDE; + virtual void finish() OVERRIDE FINAL; - HTMLDocumentParser(HTMLDocument*, bool reportErrors); + HTMLDocumentParser(HTMLDocument&, bool reportErrors); HTMLDocumentParser(DocumentFragment*, Element* contextElement, ParserContentPolicy); HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); } @@ -112,31 +120,27 @@ protected: void forcePlaintextForTextDocument(); private: - static PassRefPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) + static PassRefPtrWillBeRawPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) { - return adoptRef(new HTMLDocumentParser(fragment, contextElement, parserContentPolicy)); + return adoptRefWillBeNoop(new HTMLDocumentParser(fragment, contextElement, parserContentPolicy)); } // DocumentParser - virtual void pinToMainThread() OVERRIDE; - virtual void detach() OVERRIDE; - virtual bool hasInsertionPoint() OVERRIDE; - virtual bool processingData() const OVERRIDE; - virtual void prepareToStopParsing() OVERRIDE; - virtual void stopParsing() OVERRIDE; - virtual bool isWaitingForScripts() const OVERRIDE; - virtual bool isExecutingScript() const OVERRIDE; - virtual void executeScriptsWaitingForResources() OVERRIDE; + virtual void pinToMainThread() OVERRIDE FINAL; + virtual void detach() OVERRIDE FINAL; + virtual bool hasInsertionPoint() OVERRIDE FINAL; + virtual bool processingData() const OVERRIDE FINAL; + virtual void prepareToStopParsing() OVERRIDE FINAL; + virtual void stopParsing() OVERRIDE FINAL; + virtual bool isWaitingForScripts() const OVERRIDE FINAL; + virtual bool isExecutingScript() const OVERRIDE FINAL; + virtual void executeScriptsWaitingForResources() OVERRIDE FINAL; // HTMLScriptRunnerHost - virtual void watchForLoad(Resource*) OVERRIDE; - virtual void stopWatchingForLoad(Resource*) OVERRIDE; - virtual HTMLInputStream& inputStream() { return m_input; } - virtual bool hasPreloadScanner() const { return m_preloadScanner.get() && !shouldUseThreading(); } - virtual void appendCurrentInputStreamToPreloadScannerAndScan() OVERRIDE; - - // ResourceClient - virtual void notifyFinished(Resource*); + virtual void notifyScriptLoaded(Resource*) OVERRIDE FINAL; + virtual HTMLInputStream& inputStream() OVERRIDE FINAL { return m_input; } + virtual bool hasPreloadScanner() const OVERRIDE FINAL { return m_preloadScanner.get() && !shouldUseThreading(); } + virtual void appendCurrentInputStreamToPreloadScannerAndScan() OVERRIDE FINAL; void startBackgroundParser(); void stopBackgroundParser(); @@ -179,8 +183,8 @@ private: OwnPtr<HTMLToken> m_token; OwnPtr<HTMLTokenizer> m_tokenizer; - OwnPtr<HTMLScriptRunner> m_scriptRunner; - OwnPtr<HTMLTreeBuilder> m_treeBuilder; + OwnPtrWillBeMember<HTMLScriptRunner> m_scriptRunner; + OwnPtrWillBeMember<HTMLTreeBuilder> m_treeBuilder; OwnPtr<HTMLPreloadScanner> m_preloadScanner; OwnPtr<HTMLPreloadScanner> m_insertionPreloadScanner; OwnPtr<HTMLParserScheduler> m_parserScheduler; diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLElementStack.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLElementStack.cpp index 9adfdf1612c..5ea0549dbae 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLElementStack.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLElementStack.cpp @@ -27,13 +27,11 @@ #include "config.h" #include "core/html/parser/HTMLElementStack.h" -#include "HTMLNames.h" -#include "MathMLNames.h" -#include "SVGNames.h" +#include "core/HTMLNames.h" +#include "core/MathMLNames.h" +#include "core/SVGNames.h" #include "core/dom/Element.h" -#include "core/html/HTMLHtmlElement.h" -#include "core/html/HTMLOptGroupElement.h" -#include "core/html/HTMLTableElement.h" +#include "core/html/HTMLElement.h" namespace WebCore { @@ -45,7 +43,7 @@ namespace { inline bool isRootNode(HTMLStackItem* item) { return item->isDocumentFragmentNode() - || isHTMLHtmlElement(item->node()); + || item->hasTagName(htmlTag); } inline bool isScopeMarker(HTMLStackItem* item) @@ -54,7 +52,7 @@ inline bool isScopeMarker(HTMLStackItem* item) || item->hasTagName(captionTag) || item->hasTagName(marqueeTag) || item->hasTagName(objectTag) - || isHTMLTableElement(item->node()) + || item->hasTagName(tableTag) || item->hasTagName(tdTag) || item->hasTagName(thTag) || item->hasTagName(MathMLNames::miTag) @@ -79,7 +77,7 @@ inline bool isListItemScopeMarker(HTMLStackItem* item) inline bool isTableScopeMarker(HTMLStackItem* item) { - return isHTMLTableElement(item->node()) + return item->hasTagName(tableTag) || item->hasTagName(templateTag) || isRootNode(item); } @@ -115,24 +113,26 @@ inline bool isButtonScopeMarker(HTMLStackItem* item) inline bool isSelectScopeMarker(HTMLStackItem* item) { - return !isHTMLOptGroupElement(item->node()) + return !item->hasTagName(optgroupTag) && !item->hasTagName(optionTag); } } -HTMLElementStack::ElementRecord::ElementRecord(PassRefPtr<HTMLStackItem> item, PassOwnPtr<ElementRecord> next) +HTMLElementStack::ElementRecord::ElementRecord(PassRefPtrWillBeRawPtr<HTMLStackItem> item, PassOwnPtrWillBeRawPtr<ElementRecord> next) : m_item(item) , m_next(next) { ASSERT(m_item); } +#if !ENABLE(OILPAN) HTMLElementStack::ElementRecord::~ElementRecord() { } +#endif -void HTMLElementStack::ElementRecord::replaceElement(PassRefPtr<HTMLStackItem> item) +void HTMLElementStack::ElementRecord::replaceElement(PassRefPtrWillBeRawPtr<HTMLStackItem> item) { ASSERT(item); ASSERT(!m_item || m_item->isElementNode()); @@ -149,10 +149,18 @@ bool HTMLElementStack::ElementRecord::isAbove(ElementRecord* other) const return false; } +void HTMLElementStack::ElementRecord::trace(Visitor* visitor) +{ +#if ENABLE(OILPAN) + visitor->trace(m_item); + visitor->trace(m_next); +#endif +} + HTMLElementStack::HTMLElementStack() - : m_rootNode(0) - , m_headElement(0) - , m_bodyElement(0) + : m_rootNode(nullptr) + , m_headElement(nullptr) + , m_bodyElement(nullptr) , m_stackDepth(0) { } @@ -181,25 +189,27 @@ bool HTMLElementStack::secondElementIsHTMLBodyElement() const void HTMLElementStack::popHTMLHeadElement() { ASSERT(top() == m_headElement); - m_headElement = 0; + m_headElement = nullptr; popCommon(); } void HTMLElementStack::popHTMLBodyElement() { ASSERT(top() == m_bodyElement); - m_bodyElement = 0; + m_bodyElement = nullptr; popCommon(); } void HTMLElementStack::popAll() { - m_rootNode = 0; - m_headElement = 0; - m_bodyElement = 0; + m_rootNode = nullptr; + m_headElement = nullptr; + m_bodyElement = nullptr; m_stackDepth = 0; while (m_top) { - topNode()->finishParsingChildren(); + Node& node = *topNode(); + if (node.isElementNode()) + toElement(node).finishParsingChildren(); m_top = m_top->releaseNext(); } } @@ -301,19 +311,19 @@ void HTMLElementStack::popUntilForeignContentScopeMarker() pop(); } -void HTMLElementStack::pushRootNode(PassRefPtr<HTMLStackItem> rootItem) +void HTMLElementStack::pushRootNode(PassRefPtrWillBeRawPtr<HTMLStackItem> rootItem) { ASSERT(rootItem->isDocumentFragmentNode()); pushRootNodeCommon(rootItem); } -void HTMLElementStack::pushHTMLHtmlElement(PassRefPtr<HTMLStackItem> item) +void HTMLElementStack::pushHTMLHtmlElement(PassRefPtrWillBeRawPtr<HTMLStackItem> item) { - ASSERT(isHTMLHtmlElement(item->node())); + ASSERT(item->hasTagName(htmlTag)); pushRootNodeCommon(item); } -void HTMLElementStack::pushRootNodeCommon(PassRefPtr<HTMLStackItem> rootItem) +void HTMLElementStack::pushRootNodeCommon(PassRefPtrWillBeRawPtr<HTMLStackItem> rootItem) { ASSERT(!m_top); ASSERT(!m_rootNode); @@ -321,7 +331,7 @@ void HTMLElementStack::pushRootNodeCommon(PassRefPtr<HTMLStackItem> rootItem) pushCommon(rootItem); } -void HTMLElementStack::pushHTMLHeadElement(PassRefPtr<HTMLStackItem> item) +void HTMLElementStack::pushHTMLHeadElement(PassRefPtrWillBeRawPtr<HTMLStackItem> item) { ASSERT(item->hasTagName(HTMLNames::headTag)); ASSERT(!m_headElement); @@ -329,7 +339,7 @@ void HTMLElementStack::pushHTMLHeadElement(PassRefPtr<HTMLStackItem> item) pushCommon(item); } -void HTMLElementStack::pushHTMLBodyElement(PassRefPtr<HTMLStackItem> item) +void HTMLElementStack::pushHTMLBodyElement(PassRefPtrWillBeRawPtr<HTMLStackItem> item) { ASSERT(item->hasTagName(HTMLNames::bodyTag)); ASSERT(!m_bodyElement); @@ -337,23 +347,23 @@ void HTMLElementStack::pushHTMLBodyElement(PassRefPtr<HTMLStackItem> item) pushCommon(item); } -void HTMLElementStack::push(PassRefPtr<HTMLStackItem> item) +void HTMLElementStack::push(PassRefPtrWillBeRawPtr<HTMLStackItem> item) { - ASSERT(!isHTMLHtmlElement(item->node())); - ASSERT(!item->hasTagName(HTMLNames::headTag)); - ASSERT(!item->hasTagName(HTMLNames::bodyTag)); + ASSERT(!item->hasTagName(htmlTag)); + ASSERT(!item->hasTagName(headTag)); + ASSERT(!item->hasTagName(bodyTag)); ASSERT(m_rootNode); pushCommon(item); } -void HTMLElementStack::insertAbove(PassRefPtr<HTMLStackItem> item, ElementRecord* recordBelow) +void HTMLElementStack::insertAbove(PassRefPtrWillBeRawPtr<HTMLStackItem> item, ElementRecord* recordBelow) { ASSERT(item); ASSERT(recordBelow); ASSERT(m_top); - ASSERT(!isHTMLHtmlElement(item->node())); - ASSERT(!item->hasTagName(HTMLNames::headTag)); - ASSERT(!item->hasTagName(HTMLNames::bodyTag)); + ASSERT(!item->hasTagName(htmlTag)); + ASSERT(!item->hasTagName(headTag)); + ASSERT(!item->hasTagName(bodyTag)); ASSERT(m_rootNode); if (recordBelow == m_top) { push(item); @@ -365,7 +375,7 @@ void HTMLElementStack::insertAbove(PassRefPtr<HTMLStackItem> item, ElementRecord continue; m_stackDepth++; - recordAbove->setNext(adoptPtr(new ElementRecord(item, recordAbove->releaseNext()))); + recordAbove->setNext(adoptPtrWillBeNoop(new ElementRecord(item, recordAbove->releaseNext()))); recordAbove->next()->element()->beginParsingChildren(); return; } @@ -395,13 +405,13 @@ void HTMLElementStack::removeHTMLHeadElement(Element* element) popHTMLHeadElement(); return; } - m_headElement = 0; + m_headElement = nullptr; removeNonTopCommon(element); } void HTMLElementStack::remove(Element* element) { - ASSERT(!element->hasTagName(HTMLNames::headTag)); + ASSERT(!isHTMLHeadElement(element)); if (m_top->element() == element) { pop(); return; @@ -556,19 +566,19 @@ ContainerNode* HTMLElementStack::rootNode() const return m_rootNode; } -void HTMLElementStack::pushCommon(PassRefPtr<HTMLStackItem> item) +void HTMLElementStack::pushCommon(PassRefPtrWillBeRawPtr<HTMLStackItem> item) { ASSERT(m_rootNode); m_stackDepth++; - m_top = adoptPtr(new ElementRecord(item, m_top.release())); + m_top = adoptPtrWillBeNoop(new ElementRecord(item, m_top.release())); } void HTMLElementStack::popCommon() { - ASSERT(!isHTMLHtmlElement(topStackItem()->node())); - ASSERT(!topStackItem()->hasTagName(HTMLNames::headTag) || !m_headElement); - ASSERT(!topStackItem()->hasTagName(HTMLNames::bodyTag) || !m_bodyElement); + ASSERT(!topStackItem()->hasTagName(htmlTag)); + ASSERT(!topStackItem()->hasTagName(headTag) || !m_headElement); + ASSERT(!topStackItem()->hasTagName(bodyTag) || !m_bodyElement); top()->finishParsingChildren(); m_top = m_top->releaseNext(); @@ -578,7 +588,7 @@ void HTMLElementStack::popCommon() void HTMLElementStack::removeNonTopCommon(Element* element) { ASSERT(!isHTMLHtmlElement(element)); - ASSERT(!element->hasTagName(HTMLNames::bodyTag)); + ASSERT(!isHTMLBodyElement(element)); ASSERT(top() != element); for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) { if (pos->next()->element() == element) { @@ -606,6 +616,14 @@ HTMLElementStack::ElementRecord* HTMLElementStack::furthestBlockForFormattingEle return 0; } +void HTMLElementStack::trace(Visitor* visitor) +{ + visitor->trace(m_top); + visitor->trace(m_rootNode); + visitor->trace(m_headElement); + visitor->trace(m_bodyElement); +} + #ifndef NDEBUG void HTMLElementStack::show() diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLElementStack.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLElementStack.h index e63bbbc3e72..2afead5f97e 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLElementStack.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLElementStack.h @@ -44,35 +44,40 @@ class QualifiedName; // NOTE: The HTML5 spec uses a backwards (grows downward) stack. We're using // more standard (grows upwards) stack terminology here. class HTMLElementStack { - WTF_MAKE_NONCOPYABLE(HTMLElementStack); WTF_MAKE_FAST_ALLOCATED; + WTF_MAKE_NONCOPYABLE(HTMLElementStack); + DISALLOW_ALLOCATION(); public: HTMLElementStack(); ~HTMLElementStack(); - class ElementRecord { - WTF_MAKE_NONCOPYABLE(ElementRecord); WTF_MAKE_FAST_ALLOCATED; + class ElementRecord FINAL : public NoBaseWillBeGarbageCollected<ElementRecord> { + WTF_MAKE_NONCOPYABLE(ElementRecord); WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED; public: +#if !ENABLE(OILPAN) ~ElementRecord(); // Public for ~PassOwnPtr() +#endif Element* element() const { return m_item->element(); } ContainerNode* node() const { return m_item->node(); } const AtomicString& namespaceURI() const { return m_item->namespaceURI(); } - PassRefPtr<HTMLStackItem> stackItem() const { return m_item; } - void replaceElement(PassRefPtr<HTMLStackItem>); + PassRefPtrWillBeRawPtr<HTMLStackItem> stackItem() const { return m_item; } + void replaceElement(PassRefPtrWillBeRawPtr<HTMLStackItem>); bool isAbove(ElementRecord*) const; ElementRecord* next() const { return m_next.get(); } + + void trace(Visitor*); private: friend class HTMLElementStack; - ElementRecord(PassRefPtr<HTMLStackItem>, PassOwnPtr<ElementRecord>); + ElementRecord(PassRefPtrWillBeRawPtr<HTMLStackItem>, PassOwnPtrWillBeRawPtr<ElementRecord>); - PassOwnPtr<ElementRecord> releaseNext() { return m_next.release(); } - void setNext(PassOwnPtr<ElementRecord> next) { m_next = next; } + PassOwnPtrWillBeRawPtr<ElementRecord> releaseNext() { return m_next.release(); } + void setNext(PassOwnPtrWillBeRawPtr<ElementRecord> next) { m_next = next; } - RefPtr<HTMLStackItem> m_item; - OwnPtr<ElementRecord> m_next; + RefPtrWillBeMember<HTMLStackItem> m_item; + OwnPtrWillBeMember<ElementRecord> m_next; }; unsigned stackDepth() const { return m_stackDepth; } @@ -103,13 +108,13 @@ public: ElementRecord* furthestBlockForFormattingElement(Element*) const; ElementRecord* topmost(const AtomicString& tagName) const; - void insertAbove(PassRefPtr<HTMLStackItem>, ElementRecord*); + void insertAbove(PassRefPtrWillBeRawPtr<HTMLStackItem>, ElementRecord*); - void push(PassRefPtr<HTMLStackItem>); - void pushRootNode(PassRefPtr<HTMLStackItem>); - void pushHTMLHtmlElement(PassRefPtr<HTMLStackItem>); - void pushHTMLHeadElement(PassRefPtr<HTMLStackItem>); - void pushHTMLBodyElement(PassRefPtr<HTMLStackItem>); + void push(PassRefPtrWillBeRawPtr<HTMLStackItem>); + void pushRootNode(PassRefPtrWillBeRawPtr<HTMLStackItem>); + void pushHTMLHtmlElement(PassRefPtrWillBeRawPtr<HTMLStackItem>); + void pushHTMLHeadElement(PassRefPtrWillBeRawPtr<HTMLStackItem>); + void pushHTMLBodyElement(PassRefPtrWillBeRawPtr<HTMLStackItem>); void pop(); void popUntil(const AtomicString& tagName); @@ -159,26 +164,28 @@ public: ContainerNode* rootNode() const; + void trace(Visitor*); + #ifndef NDEBUG void show(); #endif private: - void pushCommon(PassRefPtr<HTMLStackItem>); - void pushRootNodeCommon(PassRefPtr<HTMLStackItem>); + void pushCommon(PassRefPtrWillBeRawPtr<HTMLStackItem>); + void pushRootNodeCommon(PassRefPtrWillBeRawPtr<HTMLStackItem>); void popCommon(); void removeNonTopCommon(Element*); - OwnPtr<ElementRecord> m_top; + OwnPtrWillBeMember<ElementRecord> m_top; // We remember the root node, <head> and <body> as they are pushed. Their // ElementRecords keep them alive. The root node is never popped. // FIXME: We don't currently require type-specific information about // these elements so we haven't yet bothered to plumb the types all the // way down through createElement, etc. - ContainerNode* m_rootNode; - Element* m_headElement; - Element* m_bodyElement; + RawPtrWillBeMember<ContainerNode> m_rootNode; + RawPtrWillBeMember<Element> m_headElement; + RawPtrWillBeMember<Element> m_bodyElement; unsigned m_stackDepth; }; diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLEntityParser.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLEntityParser.cpp index 97cebee4383..9866313277d 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLEntityParser.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLEntityParser.cpp @@ -115,7 +115,7 @@ static bool consumeNamedEntity(SegmentedString& source, DecodedHTMLEntity& decod } notEnoughCharacters = source.isEmpty(); if (notEnoughCharacters) { - // We can't an entity because there might be a longer entity + // We can't decide on an entity because there might be a longer entity // that we could match if we had more data. unconsumeCharacters(source, consumedCharacters); return false; @@ -130,11 +130,12 @@ static bool consumeNamedEntity(SegmentedString& source, DecodedHTMLEntity& decod // actual entity. unconsumeCharacters(source, consumedCharacters); consumedCharacters.clear(); - const int length = entitySearch.mostRecentMatch()->length; - const UChar* reference = entitySearch.mostRecentMatch()->entity; + const HTMLEntityTableEntry* mostRecent = entitySearch.mostRecentMatch(); + const int length = mostRecent->length; + const LChar* reference = HTMLEntityTable::entityString(*mostRecent); for (int i = 0; i < length; ++i) { cc = source.currentChar(); - ASSERT_UNUSED(reference, cc == *reference++); + ASSERT_UNUSED(reference, cc == static_cast<UChar>(*reference++)); consumedCharacters.append(cc); source.advanceAndASSERT(cc); ASSERT(!source.isEmpty()); diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLEntitySearch.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLEntitySearch.cpp index c4ef2b0c92f..fe847e48038 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLEntitySearch.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLEntitySearch.cpp @@ -47,7 +47,8 @@ HTMLEntitySearch::CompareResult HTMLEntitySearch::compare(const HTMLEntityTableE { if (entry->length < m_currentLength + 1) return Before; - UChar entryNextCharacter = entry->entity[m_currentLength]; + const LChar* entityString = HTMLEntityTable::entityString(*entry); + UChar entryNextCharacter = entityString[m_currentLength]; if (entryNextCharacter == nextCharacter) return Prefix; return entryNextCharacter < nextCharacter ? Before : After; diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLEntityTable.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLEntityTable.h index f0d775efee7..1e465049b57 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLEntityTable.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLEntityTable.h @@ -30,13 +30,14 @@ namespace WebCore { +// Member order to optimize packing. There will be thousands of these objects. struct HTMLEntityTableEntry { - UChar lastCharacter() const { return entity[length - 1]; } + LChar lastCharacter() const; - const UChar* entity; - int length; UChar32 firstValue; - UChar32 secondValue; + UChar secondValue; // UChar since double char sequences only use BMP chars. + short entityOffset; + short length; }; class HTMLEntityTable { @@ -46,6 +47,8 @@ public: static const HTMLEntityTableEntry* firstEntryStartingWith(UChar); static const HTMLEntityTableEntry* lastEntryStartingWith(UChar); + + static const LChar* entityString(const HTMLEntityTableEntry&); }; } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLFormattingElementList.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLFormattingElementList.cpp index 34215d75e26..26418debc1c 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLFormattingElementList.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLFormattingElementList.cpp @@ -79,7 +79,7 @@ HTMLFormattingElementList::Bookmark HTMLFormattingElementList::bookmarkFor(Eleme return Bookmark(&at(index)); } -void HTMLFormattingElementList::swapTo(Element* oldElement, PassRefPtr<HTMLStackItem> newItem, const Bookmark& bookmark) +void HTMLFormattingElementList::swapTo(Element* oldElement, PassRefPtrWillBeRawPtr<HTMLStackItem> newItem, const Bookmark& bookmark) { ASSERT(contains(oldElement)); ASSERT(!contains(newItem->element())); @@ -94,7 +94,7 @@ void HTMLFormattingElementList::swapTo(Element* oldElement, PassRefPtr<HTMLStack remove(oldElement); } -void HTMLFormattingElementList::append(PassRefPtr<HTMLStackItem> item) +void HTMLFormattingElementList::append(PassRefPtrWillBeRawPtr<HTMLStackItem> item) { ensureNoahsArkCondition(item.get()); m_entries.append(item); @@ -123,7 +123,7 @@ void HTMLFormattingElementList::clearToLastMarker() } } -void HTMLFormattingElementList::tryToEnsureNoahsArkConditionQuickly(HTMLStackItem* newItem, Vector<HTMLStackItem*>& remainingCandidates) +void HTMLFormattingElementList::tryToEnsureNoahsArkConditionQuickly(HTMLStackItem* newItem, WillBeHeapVector<RawPtrWillBeMember<HTMLStackItem> >& remainingCandidates) { ASSERT(remainingCandidates.isEmpty()); @@ -132,7 +132,7 @@ void HTMLFormattingElementList::tryToEnsureNoahsArkConditionQuickly(HTMLStackIte // Use a vector with inline capacity to avoid a malloc in the common case // of a quickly ensuring the condition. - Vector<HTMLStackItem*, 10> candidates; + WillBeHeapVector<RawPtrWillBeMember<HTMLStackItem>, 10> candidates; size_t newItemAttributeCount = newItem->attributes().size(); @@ -155,19 +155,19 @@ void HTMLFormattingElementList::tryToEnsureNoahsArkConditionQuickly(HTMLStackIte if (candidates.size() < kNoahsArkCapacity) return; // There's room for the new element in the ark. There's no need to copy out the remainingCandidates. - remainingCandidates.append(candidates); + remainingCandidates.appendVector(candidates); } void HTMLFormattingElementList::ensureNoahsArkCondition(HTMLStackItem* newItem) { - Vector<HTMLStackItem*> candidates; + WillBeHeapVector<RawPtrWillBeMember<HTMLStackItem> > candidates; tryToEnsureNoahsArkConditionQuickly(newItem, candidates); if (candidates.isEmpty()) return; // We pre-allocate and re-use this second vector to save one malloc per // attribute that we verify. - Vector<HTMLStackItem*> remainingCandidates; + WillBeHeapVector<RawPtrWillBeMember<HTMLStackItem> > remainingCandidates; remainingCandidates.reserveInitialCapacity(candidates.size()); const Vector<Attribute>& attributes = newItem->attributes(); diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLFormattingElementList.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLFormattingElementList.h index 745dba1c771..cb7c0ce34fb 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLFormattingElementList.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLFormattingElementList.h @@ -38,6 +38,7 @@ class Element; // This may end up merged into HTMLElementStack. class HTMLFormattingElementList { WTF_MAKE_NONCOPYABLE(HTMLFormattingElementList); + DISALLOW_ALLOCATION(); public: HTMLFormattingElementList(); ~HTMLFormattingElementList(); @@ -46,22 +47,23 @@ public: // between the HTMLFormattingElementList and HTMLElementStack and needs // access to Entry::isMarker() and Entry::replaceElement() to do so. class Entry { + ALLOW_ONLY_INLINE_ALLOCATION(); public: // Inline because they're hot and Vector<T> uses them. - explicit Entry(PassRefPtr<HTMLStackItem> item) + explicit Entry(PassRefPtrWillBeRawPtr<HTMLStackItem> item) : m_item(item) { } enum MarkerEntryType { MarkerEntry }; explicit Entry(MarkerEntryType) - : m_item(0) + : m_item(nullptr) { } ~Entry() {} bool isMarker() const { return !m_item; } - PassRefPtr<HTMLStackItem> stackItem() const { return m_item; } + PassRefPtrWillBeRawPtr<HTMLStackItem> stackItem() const { return m_item; } Element* element() const { // The fact that !m_item == isMarker() is an implementation detail @@ -69,14 +71,16 @@ public: ASSERT(m_item); return m_item->element(); } - void replaceElement(PassRefPtr<HTMLStackItem> item) { m_item = item; } + void replaceElement(PassRefPtrWillBeRawPtr<HTMLStackItem> item) { m_item = item; } // Needed for use with Vector. These are super-hot and must be inline. bool operator==(Element* element) const { return !m_item ? !element : m_item->element() == element; } bool operator!=(Element* element) const { return !m_item ? !!element : m_item->element() != element; } + void trace(Visitor* visitor) { visitor->trace(m_item); } + private: - RefPtr<HTMLStackItem> m_item; + RefPtrWillBeMember<HTMLStackItem> m_item; }; class Bookmark { @@ -108,11 +112,11 @@ public: Entry* find(Element*); bool contains(Element*); - void append(PassRefPtr<HTMLStackItem>); + void append(PassRefPtrWillBeRawPtr<HTMLStackItem>); void remove(Element*); Bookmark bookmarkFor(Element*); - void swapTo(Element* oldElement, PassRefPtr<HTMLStackItem> newItem, const Bookmark&); + void swapTo(Element* oldElement, PassRefPtrWillBeRawPtr<HTMLStackItem> newItem, const Bookmark&); void appendMarker(); // clearToLastMarker also clears the marker (per the HTML5 spec). @@ -121,6 +125,8 @@ public: const Entry& at(size_t i) const { return m_entries[i]; } Entry& at(size_t i) { return m_entries[i]; } + void trace(Visitor* visitor) { visitor->trace(m_entries); } + #ifndef NDEBUG void show(); #endif @@ -130,12 +136,14 @@ private: // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#list-of-active-formatting-elements // These functions enforce the "Noah's Ark" condition, which removes redundant mis-nested elements. - void tryToEnsureNoahsArkConditionQuickly(HTMLStackItem*, Vector<HTMLStackItem*>& remainingCandiates); + void tryToEnsureNoahsArkConditionQuickly(HTMLStackItem*, WillBeHeapVector<RawPtrWillBeMember<HTMLStackItem> >& remainingCandiates); void ensureNoahsArkCondition(HTMLStackItem*); - Vector<Entry> m_entries; + WillBeHeapVector<Entry> m_entries; }; -} +} // namespace WebCore + +WTF_ALLOW_MOVE_AND_INIT_WITH_MEM_FUNCTIONS(WebCore::HTMLFormattingElementList::Entry); #endif // HTMLFormattingElementList_h diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLMetaCharsetParser.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLMetaCharsetParser.cpp index 038c8a1e5fb..5fa6d32e73d 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLMetaCharsetParser.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLMetaCharsetParser.cpp @@ -26,7 +26,7 @@ #include "config.h" #include "core/html/parser/HTMLMetaCharsetParser.h" -#include "HTMLNames.h" +#include "core/HTMLNames.h" #include "core/html/parser/HTMLParserIdioms.h" #include "core/html/parser/HTMLParserOptions.h" #include "core/html/parser/HTMLTokenizer.h" @@ -51,61 +51,12 @@ HTMLMetaCharsetParser::~HTMLMetaCharsetParser() { } -static const char charsetString[] = "charset"; -static const size_t charsetLength = sizeof("charset") - 1; - -String HTMLMetaCharsetParser::extractCharset(const String& value) -{ - size_t pos = 0; - unsigned length = value.length(); - - while (pos < length) { - pos = value.find(charsetString, pos, false); - if (pos == kNotFound) - break; - - pos += charsetLength; - - // Skip whitespace. - while (pos < length && value[pos] <= ' ') - ++pos; - - if (value[pos] != '=') - continue; - - ++pos; - - while (pos < length && value[pos] <= ' ') - ++pos; - - char quoteMark = 0; - if (pos < length && (value[pos] == '"' || value[pos] == '\'')) { - quoteMark = static_cast<char>(value[pos++]); - ASSERT(!(quoteMark & 0x80)); - } - - if (pos == length) - break; - - unsigned end = pos; - while (end < length && ((quoteMark && value[end] != quoteMark) || (!quoteMark && value[end] > ' ' && value[end] != '"' && value[end] != '\'' && value[end] != ';'))) - ++end; - - if (quoteMark && (end == length)) - break; // Close quote not found. - - return value.substring(pos, end - pos); - } - - return ""; -} - bool HTMLMetaCharsetParser::processMeta() { const HTMLToken::AttributeList& tokenAttributes = m_token.attributes(); - AttributeList attributes; + HTMLAttributeList attributes; for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin(); iter != tokenAttributes.end(); ++iter) { - String attributeName = StringImpl::create8BitIfPossible(iter->name); + String attributeName = attemptStaticStringCreation(iter->name, Likely8Bit); String attributeValue = StringImpl::create8BitIfPossible(iter->value); attributes.append(std::make_pair(attributeName, attributeValue)); } @@ -114,37 +65,6 @@ bool HTMLMetaCharsetParser::processMeta() return m_encoding.isValid(); } -WTF::TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const AttributeList& attributes) -{ - bool gotPragma = false; - Mode mode = None; - String charset; - - for (AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) { - const AtomicString& attributeName = iter->first; - const String& attributeValue = iter->second; - - if (attributeName == http_equivAttr) { - if (equalIgnoringCase(attributeValue, "content-type")) - gotPragma = true; - } else if (charset.isEmpty()) { - if (attributeName == charsetAttr) { - charset = attributeValue; - mode = Charset; - } else if (attributeName == contentAttr) { - charset = extractCharset(attributeValue); - if (charset.length()) - mode = Pragma; - } - } - } - - if (mode == Charset || (mode == Pragma && gotPragma)) - return WTF::TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset)); - - return WTF::TextEncoding(); -} - static const int bytesToCheckUnconditionally = 1024; // That many input bytes will be checked for meta charset even if <head> section is over. bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length) @@ -177,20 +97,20 @@ bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length) while (m_tokenizer->nextToken(m_input, m_token)) { bool end = m_token.type() == HTMLToken::EndTag; if (end || m_token.type() == HTMLToken::StartTag) { - AtomicString tagName(m_token.name()); + String tagName = attemptStaticStringCreation(m_token.name(), Likely8Bit); if (!end) { m_tokenizer->updateStateFor(tagName); - if (tagName == metaTag && processMeta()) { + if (threadSafeMatch(tagName, metaTag) && processMeta()) { m_doneChecking = true; return true; } } - if (tagName != scriptTag && tagName != noscriptTag - && tagName != styleTag && tagName != linkTag - && tagName != metaTag && tagName != objectTag - && tagName != titleTag && tagName != baseTag - && (end || tagName != htmlTag) && (end || tagName != headTag)) { + if (!threadSafeMatch(tagName, scriptTag) && !threadSafeMatch(tagName, noscriptTag) + && !threadSafeMatch(tagName, styleTag) && !threadSafeMatch(tagName, linkTag) + && !threadSafeMatch(tagName, metaTag) && !threadSafeMatch(tagName, objectTag) + && !threadSafeMatch(tagName, titleTag) && !threadSafeMatch(tagName, baseTag) + && (end || !threadSafeMatch(tagName, htmlTag)) && (end || !threadSafeMatch(tagName, headTag))) { m_inHeadSection = false; } } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLMetaCharsetParser.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLMetaCharsetParser.h index 3393fca40d2..65d9517a2e0 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLMetaCharsetParser.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLMetaCharsetParser.h @@ -48,22 +48,10 @@ public: const WTF::TextEncoding& encoding() { return m_encoding; } - typedef Vector<pair<String, String> > AttributeList; - // The returned encoding might not be valid. - static WTF::TextEncoding encodingFromMetaAttributes(const AttributeList& -); - private: HTMLMetaCharsetParser(); bool processMeta(); - static String extractCharset(const String&); - - enum Mode { - None, - Charset, - Pragma, - }; OwnPtr<HTMLTokenizer> m_tokenizer; OwnPtr<TextCodec> m_assumedCodec; diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserIdioms.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserIdioms.cpp index f538c54cc5b..c0557a82281 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserIdioms.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserIdioms.cpp @@ -25,14 +25,18 @@ #include "config.h" #include "core/html/parser/HTMLParserIdioms.h" +#include "core/HTMLNames.h" #include <limits> #include "wtf/MathExtras.h" #include "wtf/text/AtomicString.h" #include "wtf/text/StringBuilder.h" #include "wtf/text/StringHash.h" +#include "wtf/text/TextEncoding.h" namespace WebCore { +using namespace HTMLNames; + template <typename CharType> static String stripLeadingAndTrailingHTMLSpaces(String string, const CharType* characters, unsigned length) { @@ -91,8 +95,7 @@ String serializeForNumberType(double number) Decimal parseToDecimalForNumberType(const String& string, const Decimal& fallbackValue) { - // See HTML5 2.5.4.3 `Real numbers.' and parseToDoubleForNumberType - + // http://www.whatwg.org/specs/web-apps/current-work/#floating-point-numbers and parseToDoubleForNumberType // String::toDouble() accepts leading + and whitespace characters, which are not valid here. const UChar firstCharacter = string[0]; if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter)) @@ -102,11 +105,9 @@ Decimal parseToDecimalForNumberType(const String& string, const Decimal& fallbac if (!value.isFinite()) return fallbackValue; - // Numbers are considered finite IEEE 754 single-precision floating point values. - // See HTML5 2.5.4.3 `Real numbers.' - // FIXME: We should use numeric_limits<double>::max for number input type. - const Decimal floatMax = Decimal::fromDouble(std::numeric_limits<float>::max()); - if (value < -floatMax || value > floatMax) + // Numbers are considered finite IEEE 754 Double-precision floating point values. + const Decimal doubleMax = Decimal::fromDouble(std::numeric_limits<double>::max()); + if (value < -doubleMax || value > doubleMax) return fallbackValue; // We return +0 for -0 case. @@ -115,8 +116,7 @@ Decimal parseToDecimalForNumberType(const String& string, const Decimal& fallbac double parseToDoubleForNumberType(const String& string, double fallbackValue) { - // See HTML5 2.5.4.3 `Real numbers.' - + // http://www.whatwg.org/specs/web-apps/current-work/#floating-point-numbers // String::toDouble() accepts leading + and whitespace characters, which are not valid here. UChar firstCharacter = string[0]; if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter)) @@ -131,9 +131,8 @@ double parseToDoubleForNumberType(const String& string, double fallbackValue) if (!std::isfinite(value)) return fallbackValue; - // Numbers are considered finite IEEE 754 single-precision floating point values. - // See HTML5 2.5.4.3 `Real numbers.' - if (-std::numeric_limits<float>::max() > value || value > std::numeric_limits<float>::max()) + // Numbers are considered finite IEEE 754 Double-precision floating point values. + if (-std::numeric_limits<double>::max() > value || value > std::numeric_limits<double>::max()) return fallbackValue; // The following expression converts -0 to +0. @@ -265,6 +264,92 @@ bool parseHTMLNonNegativeInteger(const String& input, unsigned& value) return parseHTMLNonNegativeIntegerInternal(start, start + length, value); } +static const char charsetString[] = "charset"; +static const size_t charsetLength = sizeof("charset") - 1; + +String extractCharset(const String& value) +{ + size_t pos = 0; + unsigned length = value.length(); + + while (pos < length) { + pos = value.find(charsetString, pos, false); + if (pos == kNotFound) + break; + + pos += charsetLength; + + // Skip whitespace. + while (pos < length && value[pos] <= ' ') + ++pos; + + if (value[pos] != '=') + continue; + + ++pos; + + while (pos < length && value[pos] <= ' ') + ++pos; + + char quoteMark = 0; + if (pos < length && (value[pos] == '"' || value[pos] == '\'')) { + quoteMark = static_cast<char>(value[pos++]); + ASSERT(!(quoteMark & 0x80)); + } + + if (pos == length) + break; + + unsigned end = pos; + while (end < length && ((quoteMark && value[end] != quoteMark) || (!quoteMark && value[end] > ' ' && value[end] != '"' && value[end] != '\'' && value[end] != ';'))) + ++end; + + if (quoteMark && (end == length)) + break; // Close quote not found. + + return value.substring(pos, end - pos); + } + + return ""; +} + +enum Mode { + None, + Charset, + Pragma, +}; + +WTF::TextEncoding encodingFromMetaAttributes(const HTMLAttributeList& attributes) +{ + bool gotPragma = false; + Mode mode = None; + String charset; + + for (HTMLAttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) { + const String& attributeName = iter->first; + const String& attributeValue = AtomicString(iter->second); + + if (threadSafeMatch(attributeName, http_equivAttr)) { + if (equalIgnoringCase(attributeValue, "content-type")) + gotPragma = true; + } else if (charset.isEmpty()) { + if (threadSafeMatch(attributeName, charsetAttr)) { + charset = attributeValue; + mode = Charset; + } else if (threadSafeMatch(attributeName, contentAttr)) { + charset = extractCharset(attributeValue); + if (charset.length()) + mode = Pragma; + } + } + } + + if (mode == Charset || (mode == Pragma && gotPragma)) + return WTF::TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset)); + + return WTF::TextEncoding(); +} + static bool threadSafeEqual(const StringImpl* a, const StringImpl* b) { if (a == b) @@ -284,7 +369,8 @@ bool threadSafeMatch(const String& localName, const QualifiedName& qName) return threadSafeEqual(localName.impl(), qName.localName().impl()); } -StringImpl* findStringIfStatic(const UChar* characters, unsigned length) +template<typename CharType> +inline StringImpl* findStringIfStatic(const CharType* characters, unsigned length) { // We don't need to try hashing if we know the string is too long. if (length > StringImpl::highestStaticStringLength()) @@ -306,4 +392,27 @@ StringImpl* findStringIfStatic(const UChar* characters, unsigned length) return it->value; } +String attemptStaticStringCreation(const LChar* characters, size_t size) +{ + String string(findStringIfStatic(characters, size)); + if (string.impl()) + return string; + return String(characters, size); +} + +String attemptStaticStringCreation(const UChar* characters, size_t size, CharacterWidth width) +{ + String string(findStringIfStatic(characters, size)); + if (string.impl()) + return string; + if (width == Likely8Bit) + string = StringImpl::create8BitIfPossible(characters, size); + else if (width == Force8Bit) + string = String::make8BitFrom16BitSource(characters, size); + else + string = String(characters, size); + + return string; +} + } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserIdioms.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserIdioms.h index 16fd3eebe3c..d5ab2899638 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserIdioms.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserIdioms.h @@ -30,12 +30,11 @@ #include "wtf/Forward.h" #include "wtf/text/WTFString.h" -namespace WebCore { +namespace WTF { +class TextEncoding; +} -// Space characters as defined by the HTML specification. -bool isHTMLSpace(UChar); -bool isHTMLLineBreak(UChar); -bool isNotHTMLSpace(UChar); +namespace WebCore { // Strip leading and trailing whitespace as defined by the HTML specification. String stripLeadingAndTrailingHTMLSpaces(const String&); @@ -59,10 +58,13 @@ double parseToDoubleForNumberType(const String&, double fallbackValue = std::num bool parseHTMLInteger(const String&, int&); // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-non-negative-integers -bool parseHTMLNonNegativeInteger(const String&, unsigned int&); +bool parseHTMLNonNegativeInteger(const String&, unsigned&); -// Inline implementations of some of the functions declared above. +typedef Vector<pair<String, String> > HTMLAttributeList; +// The returned encoding might not be valid. +WTF::TextEncoding encodingFromMetaAttributes(const HTMLAttributeList&); +// Space characters as defined by the HTML specification. template<typename CharType> inline bool isHTMLSpace(CharType character) { @@ -80,9 +82,15 @@ inline bool isHTMLSpace(CharType character) } template<typename CharType> +inline bool isComma(CharType character) +{ + return character == ','; +} + +template<typename CharType> inline bool isHTMLSpaceOrComma(CharType character) { - return isHTMLSpace<CharType>(character) || character == ','; + return isComma(character) || isHTMLSpace(character); } inline bool isHTMLLineBreak(UChar character) @@ -99,29 +107,29 @@ inline bool isNotHTMLSpace(CharType character) bool threadSafeMatch(const QualifiedName&, const QualifiedName&); bool threadSafeMatch(const String&, const QualifiedName&); -StringImpl* findStringIfStatic(const UChar* characters, unsigned length); - enum CharacterWidth { Likely8Bit, Force8Bit, Force16Bit }; +String attemptStaticStringCreation(const LChar*, size_t); + +String attemptStaticStringCreation(const UChar*, size_t, CharacterWidth); + template<size_t inlineCapacity> -static String attemptStaticStringCreation(const Vector<UChar, inlineCapacity>& vector, CharacterWidth width) +inline static String attemptStaticStringCreation(const Vector<UChar, inlineCapacity>& vector, CharacterWidth width) { - String string(findStringIfStatic(vector.data(), vector.size())); - if (string.impl()) - return string; - if (width == Likely8Bit) - string = StringImpl::create8BitIfPossible(vector); - else if (width == Force8Bit) - string = String::make8BitFrom16BitSource(vector); - else - string = String(vector); - - return string; + return attemptStaticStringCreation(vector.data(), vector.size(), width); } +inline static String attemptStaticStringCreation(const String str) +{ + if (!str.is8Bit()) + return attemptStaticStringCreation(str.characters16(), str.length(), Force16Bit); + return attemptStaticStringCreation(str.characters8(), str.length()); +} + + } #endif diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserOptions.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserOptions.cpp index 98ceb68461d..b08cf92994e 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserOptions.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserOptions.cpp @@ -28,23 +28,28 @@ #include "bindings/v8/ScriptController.h" #include "core/dom/Document.h" -#include "core/loader/FrameLoader.h" -#include "core/frame/Frame.h" +#include "core/frame/LocalFrame.h" #include "core/frame/Settings.h" +#include "core/loader/FrameLoader.h" namespace WebCore { HTMLParserOptions::HTMLParserOptions(Document* document) { - Frame* frame = document ? document->frame() : 0; + LocalFrame* frame = document ? document->frame() : 0; scriptEnabled = frame && frame->script().canExecuteScripts(NotAboutToExecuteScript); pluginsEnabled = frame && frame->loader().allowPlugins(NotAboutToInstantiatePlugin); - Settings* settings = document ? document->settings() : 0; - // We force the main-thread parser for about:blank, javascript: and data: urls for compatibility - // with historical synchronous loading/parsing behavior of those schemes. - useThreading = settings && settings->threadedHTMLParser() && !document->url().isBlankURL() - && (settings->useThreadedHTMLParserForDataURLs() || !document->url().protocolIsData()); + // We force the main-thread parser for two cases: + // - about:blank and javascript (which uses about:blank) for compatibility + // with historical synchronous loading/parsing behavior. + // - instances where the Document has no Frame (this happens sometimes for + // HTML imports, and possibly other cases). + // FIXME: We want to use the threaded parser for XHRs (where there is no + // frame) so the second case should go away eventually. + // FIXME: Gecko does not load javascript: urls synchronously, why do we? + // See LayoutTests/loader/iframe-sync-loads.html + useThreading = document && document->frame() && !document->url().isAboutBlankURL(); } } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserScheduler.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserScheduler.cpp index c74628479df..61cb172ccae 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserScheduler.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserScheduler.cpp @@ -88,34 +88,14 @@ HTMLParserScheduler::~HTMLParserScheduler() void HTMLParserScheduler::continueNextChunkTimerFired(Timer<HTMLParserScheduler>* timer) { ASSERT_UNUSED(timer, timer == &m_continueNextChunkTimer); - // FIXME: The timer class should handle timer priorities instead of this code. - // If a layout is scheduled, wait again to let the layout timer run first. - // FIXME: We should fix this by reducing the max-parse-time instead of - // artificially forcing the parser to yield agressively before first layout. - if (m_parser->document()->shouldParserYieldAgressivelyBeforeScriptExecution()) { - m_continueNextChunkTimer.startOneShot(0); - return; - } m_parser->resumeParsingAfterYield(); } -void HTMLParserScheduler::checkForYieldBeforeScript(PumpSession& session) -{ - // If we've never painted before and a layout is pending, yield prior to running - // scripts to give the page a chance to paint earlier. - Document* document = m_parser->document(); - bool needsFirstPaint = document->view() && !document->view()->hasEverPainted(); - if (needsFirstPaint && document->shouldParserYieldAgressivelyBeforeScriptExecution()) - session.needsYield = true; - session.didSeeScript = true; -} - void HTMLParserScheduler::scheduleForResume() { - m_continueNextChunkTimer.startOneShot(0); + m_continueNextChunkTimer.startOneShot(0, FROM_HERE); } - void HTMLParserScheduler::suspend() { ASSERT(!m_isSuspendedWithActiveTimer); @@ -131,7 +111,7 @@ void HTMLParserScheduler::resume() if (!m_isSuspendedWithActiveTimer) return; m_isSuspendedWithActiveTimer = false; - m_continueNextChunkTimer.startOneShot(0); + m_continueNextChunkTimer.startOneShot(0, FROM_HERE); } } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserScheduler.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserScheduler.h index e8bfe493085..4e3364edfbf 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserScheduler.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserScheduler.h @@ -38,15 +38,17 @@ class Document; class HTMLDocumentParser; class ActiveParserSession { + STACK_ALLOCATED(); public: explicit ActiveParserSession(Document*); ~ActiveParserSession(); private: - RefPtr<Document> m_document; + RefPtrWillBeMember<Document> m_document; }; class PumpSession : public NestingLevelIncrementer, public ActiveParserSession { + STACK_ALLOCATED(); public: PumpSession(unsigned& nestingLevel, Document*); ~PumpSession(); @@ -84,7 +86,6 @@ public: } ++session.processedTokens; } - void checkForYieldBeforeScript(PumpSession&); void scheduleForResume(); bool isScheduledForResume() const { return m_isSuspendedWithActiveTimer || m_continueNextChunkTimer.isActive(); } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserThread.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserThread.cpp index 5a0e30c3e67..5b5c6939714 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserThread.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserThread.cpp @@ -32,13 +32,15 @@ #include "core/html/parser/HTMLParserThread.h" #include "platform/Task.h" +#include "platform/TaskSynchronizer.h" #include "public/platform/Platform.h" #include "wtf/PassOwnPtr.h" namespace WebCore { +static HTMLParserThread* s_sharedThread = 0; + HTMLParserThread::HTMLParserThread() - : m_thread(adoptPtr(blink::Platform::current()->createThread("HTMLParserThread"))) { } @@ -46,17 +48,66 @@ HTMLParserThread::~HTMLParserThread() { } +void HTMLParserThread::init() +{ + ASSERT(!s_sharedThread); + s_sharedThread = new HTMLParserThread; +} + +void HTMLParserThread::setupHTMLParserThread() +{ + m_pendingGCRunner = adoptPtr(new PendingGCRunner); + m_messageLoopInterruptor = adoptPtr(new MessageLoopInterruptor(&platformThread())); + platformThread().addTaskObserver(m_pendingGCRunner.get()); + ThreadState::attach(); + ThreadState::current()->addInterruptor(m_messageLoopInterruptor.get()); +} + +void HTMLParserThread::shutdown() +{ + ASSERT(s_sharedThread); + // currentThread will always be non-null in production, but can be null in Chromium unit tests. + if (blink::Platform::current()->currentThread() && s_sharedThread->isRunning()) { + TaskSynchronizer taskSynchronizer; + s_sharedThread->postTask(WTF::bind(&HTMLParserThread::cleanupHTMLParserThread, s_sharedThread, &taskSynchronizer)); + taskSynchronizer.waitForTaskCompletion(); + } + delete s_sharedThread; + s_sharedThread = 0; +} + +void HTMLParserThread::cleanupHTMLParserThread(TaskSynchronizer* taskSynchronizer) +{ + ThreadState::current()->removeInterruptor(m_messageLoopInterruptor.get()); + ThreadState::detach(); + platformThread().removeTaskObserver(m_pendingGCRunner.get()); + m_pendingGCRunner = nullptr; + m_messageLoopInterruptor = nullptr; + taskSynchronizer->taskCompleted(); +} + HTMLParserThread* HTMLParserThread::shared() { - static HTMLParserThread* thread; - if (!thread) - thread = new HTMLParserThread; - return thread; + return s_sharedThread; +} + +blink::WebThread& HTMLParserThread::platformThread() +{ + if (!isRunning()) { + m_thread = adoptPtr(blink::Platform::current()->createThread("HTMLParserThread")); + postTask(WTF::bind(&HTMLParserThread::setupHTMLParserThread, this)); + } + return *m_thread; +} + +bool HTMLParserThread::isRunning() +{ + return !!m_thread; } void HTMLParserThread::postTask(const Closure& closure) { - m_thread->postTask(new Task(closure)); + platformThread().postTask(new Task(closure)); } } // namespace WebCore diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserThread.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserThread.h index e0b85f9399c..42630f39cfa 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserThread.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserThread.h @@ -31,22 +31,37 @@ #ifndef HTMLParserThread_h #define HTMLParserThread_h +#include "platform/heap/glue/MessageLoopInterruptor.h" +#include "platform/heap/glue/PendingGCRunner.h" +#include "public/platform/WebThread.h" #include "wtf/Functional.h" #include "wtf/OwnPtr.h" -#include "public/platform/WebThread.h" namespace WebCore { +class TaskSynchronizer; + class HTMLParserThread { public: + static void init(); + static void shutdown(); + + // It is an error to call shared() before init() or after shutdown(); static HTMLParserThread* shared(); + void postTask(const Closure&); + blink::WebThread& platformThread(); + bool isRunning(); private: HTMLParserThread(); ~HTMLParserThread(); + void setupHTMLParserThread(); + void cleanupHTMLParserThread(TaskSynchronizer*); OwnPtr<blink::WebThread> m_thread; + OwnPtr<PendingGCRunner> m_pendingGCRunner; + OwnPtr<MessageLoopInterruptor> m_messageLoopInterruptor; }; } // namespace WebCore diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserThreadTest.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserThreadTest.cpp new file mode 100644 index 00000000000..8b742b86dc1 --- /dev/null +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLParserThreadTest.cpp @@ -0,0 +1,26 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "config.h" +#include "core/html/parser/HTMLParserThread.h" + +#include <gtest/gtest.h> + +namespace { + +using namespace WebCore; + +TEST(HTMLParserThread, Init) +{ + // The harness has already run init() for us, so tear down the parser first. + ASSERT_TRUE(HTMLParserThread::shared()); + HTMLParserThread::shutdown(); + + // Make sure starting the parser thread brings it back to life. + ASSERT_FALSE(HTMLParserThread::shared()); + HTMLParserThread::init(); + ASSERT_TRUE(HTMLParserThread::shared()); +} + +} // namespace diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLPreloadScanner.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLPreloadScanner.cpp index 0d1e0645ad7..6bea3f676a1 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLPreloadScanner.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLPreloadScanner.cpp @@ -28,13 +28,17 @@ #include "config.h" #include "core/html/parser/HTMLPreloadScanner.h" -#include "HTMLNames.h" -#include "InputTypeNames.h" -#include "RuntimeEnabledFeatures.h" +#include "core/HTMLNames.h" +#include "core/InputTypeNames.h" +#include "core/css/MediaList.h" +#include "core/css/MediaQueryEvaluator.h" +#include "core/css/MediaValues.h" +#include "core/css/parser/SizesAttributeParser.h" #include "core/html/LinkRelAttribute.h" #include "core/html/parser/HTMLParserIdioms.h" #include "core/html/parser/HTMLSrcsetParser.h" #include "core/html/parser/HTMLTokenizer.h" +#include "platform/RuntimeEnabledFeatures.h" #include "platform/TraceEvent.h" #include "wtf/MainThread.h" @@ -90,19 +94,33 @@ static String initiatorFor(const StringImpl* tagImpl) return emptyString(); } +static bool mediaAttributeMatches(const MediaValues& mediaValues, const String& attributeValue) +{ + RefPtrWillBeRawPtr<MediaQuerySet> mediaQueries = MediaQuerySet::createOffMainThread(attributeValue); + MediaQueryEvaluator mediaQueryEvaluator("screen", mediaValues); + return mediaQueryEvaluator.eval(mediaQueries.get()); +} + class TokenPreloadScanner::StartTagScanner { public: - StartTagScanner(const StringImpl* tagImpl, float deviceScaleFactor) + StartTagScanner(const StringImpl* tagImpl, PassRefPtr<MediaValues> mediaValues) : m_tagImpl(tagImpl) , m_linkIsStyleSheet(false) + , m_matchedMediaAttribute(true) , m_inputIsImage(false) - , m_deviceScaleFactor(deviceScaleFactor) - , m_encounteredImgSrc(false) + , m_sourceSize(0) + , m_sourceSizeSet(false) , m_isCORSEnabled(false) , m_allowCredentials(DoNotAllowStoredCredentials) + , m_mediaValues(mediaValues) { - if (!match(m_tagImpl, imgTag) - && !match(m_tagImpl, inputTag) + if (match(m_tagImpl, imgTag) + || match(m_tagImpl, sourceTag)) { + if (RuntimeEnabledFeatures::pictureSizesEnabled()) + m_sourceSize = SizesAttributeParser::findEffectiveSize(String(), m_mediaValues); + return; + } + if ( !match(m_tagImpl, inputTag) && !match(m_tagImpl, linkTag) && !match(m_tagImpl, scriptTag)) m_tagImpl = 0; @@ -133,57 +151,122 @@ public: processAttribute(iter->name, iter->value); } + void handlePictureSourceURL(String& sourceURL) + { + if (match(m_tagImpl, sourceTag) && m_matchedMediaAttribute && sourceURL.isEmpty()) + sourceURL = m_srcsetImageCandidate.toString(); + else if (match(m_tagImpl, imgTag) && !sourceURL.isEmpty()) + setUrlToLoad(sourceURL, AllowURLReplacement); + } + PassOwnPtr<PreloadRequest> createPreloadRequest(const KURL& predictedBaseURL, const SegmentedString& source) { - if (!shouldPreload()) + if (!shouldPreload() || !m_matchedMediaAttribute) return nullptr; TRACE_EVENT_INSTANT1("net", "PreloadRequest", "url", m_urlToLoad.ascii()); TextPosition position = TextPosition(source.currentLine(), source.currentColumn()); - OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType(), m_mediaAttribute); + OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType()); if (isCORSEnabled()) - request->setCrossOriginEnabled(allowCredentials()); + request->setCrossOriginEnabled(allowStoredCredentials()); request->setCharset(charset()); return request.release(); } private: template<typename NameType> + void processScriptAttribute(const NameType& attributeName, const String& attributeValue) + { + // FIXME - Don't set crossorigin multiple times. + if (match(attributeName, srcAttr)) + setUrlToLoad(attributeValue, DisallowURLReplacement); + else if (match(attributeName, crossoriginAttr)) + setCrossOriginAllowed(attributeValue); + } + + template<typename NameType> + void processImgAttribute(const NameType& attributeName, const String& attributeValue) + { + if (match(attributeName, srcAttr) && m_imgSrcUrl.isNull()) { + m_imgSrcUrl = attributeValue; + setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue, m_srcsetImageCandidate), AllowURLReplacement); + } else if (match(attributeName, crossoriginAttr)) { + setCrossOriginAllowed(attributeValue); + } else if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) { + m_srcsetAttributeValue = attributeValue; + m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue); + setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement); + } else if (RuntimeEnabledFeatures::pictureSizesEnabled() && match(attributeName, sizesAttr) && !m_sourceSizeSet) { + m_sourceSize = SizesAttributeParser::findEffectiveSize(attributeValue, m_mediaValues); + m_sourceSizeSet = true; + if (!m_srcsetImageCandidate.isEmpty()) { + m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue); + setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement); + } + } + } + + template<typename NameType> + void processLinkAttribute(const NameType& attributeName, const String& attributeValue) + { + // FIXME - Don't set rel/media/crossorigin multiple times. + if (match(attributeName, hrefAttr)) + setUrlToLoad(attributeValue, DisallowURLReplacement); + else if (match(attributeName, relAttr)) + m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue); + else if (match(attributeName, mediaAttr)) + m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue); + else if (match(attributeName, crossoriginAttr)) + setCrossOriginAllowed(attributeValue); + } + + template<typename NameType> + void processInputAttribute(const NameType& attributeName, const String& attributeValue) + { + // FIXME - Don't set type multiple times. + if (match(attributeName, srcAttr)) + setUrlToLoad(attributeValue, DisallowURLReplacement); + else if (match(attributeName, typeAttr)) + m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image); + } + + template<typename NameType> + void processSourceAttribute(const NameType& attributeName, const String& attributeValue) + { + if (!RuntimeEnabledFeatures::pictureEnabled()) + return; + if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) { + m_srcsetAttributeValue = attributeValue; + m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue); + } else if (match(attributeName, sizesAttr) && !m_sourceSizeSet) { + m_sourceSize = SizesAttributeParser::findEffectiveSize(attributeValue, m_mediaValues); + m_sourceSizeSet = true; + if (!m_srcsetImageCandidate.isEmpty()) { + m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue); + } + } else if (match(attributeName, mediaAttr)) { + // FIXME - Don't match media multiple times. + m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue); + } + + } + + template<typename NameType> void processAttribute(const NameType& attributeName, const String& attributeValue) { if (match(attributeName, charsetAttr)) m_charset = attributeValue; - if (match(m_tagImpl, scriptTag)) { - if (match(attributeName, srcAttr)) - setUrlToLoad(attributeValue, DisallowURLReplacement); - else if (match(attributeName, crossoriginAttr)) - setCrossOriginAllowed(attributeValue); - } else if (match(m_tagImpl, imgTag)) { - if (match(attributeName, srcAttr) && !m_encounteredImgSrc) { - m_encounteredImgSrc = true; - setUrlToLoad(bestFitSourceForImageAttributes(m_deviceScaleFactor, attributeValue, m_srcsetImageCandidate), AllowURLReplacement); - } else if (match(attributeName, crossoriginAttr)) { - setCrossOriginAllowed(attributeValue); - } else if (RuntimeEnabledFeatures::srcsetEnabled() - && match(attributeName, srcsetAttr) - && m_srcsetImageCandidate.isEmpty()) { - m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_deviceScaleFactor, attributeValue); - setUrlToLoad(bestFitSourceForImageAttributes(m_deviceScaleFactor, m_urlToLoad, m_srcsetImageCandidate), AllowURLReplacement); - } - } else if (match(m_tagImpl, linkTag)) { - if (match(attributeName, hrefAttr)) - setUrlToLoad(attributeValue, DisallowURLReplacement); - else if (match(attributeName, relAttr)) - m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue); - else if (match(attributeName, mediaAttr)) - m_mediaAttribute = attributeValue; - } else if (match(m_tagImpl, inputTag)) { - if (match(attributeName, srcAttr)) - setUrlToLoad(attributeValue, DisallowURLReplacement); - else if (match(attributeName, typeAttr)) - m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image); - } + if (match(m_tagImpl, scriptTag)) + processScriptAttribute(attributeName, attributeValue); + else if (match(m_tagImpl, imgTag)) + processImgAttribute(attributeName, attributeValue); + else if (match(m_tagImpl, linkTag)) + processLinkAttribute(attributeName, attributeValue); + else if (match(m_tagImpl, inputTag)) + processInputAttribute(attributeName, attributeValue); + else if (match(m_tagImpl, sourceTag)) + processSourceAttribute(attributeName, attributeValue); } static bool relAttributeIsStyleSheet(const String& attributeValue) @@ -240,7 +323,7 @@ private: return m_isCORSEnabled; } - StoredCredentials allowCredentials() const + StoredCredentials allowStoredCredentials() const { return m_allowCredentials; } @@ -259,19 +342,23 @@ private: ImageCandidate m_srcsetImageCandidate; String m_charset; bool m_linkIsStyleSheet; - String m_mediaAttribute; + bool m_matchedMediaAttribute; bool m_inputIsImage; - float m_deviceScaleFactor; - bool m_encounteredImgSrc; + String m_imgSrcUrl; + String m_srcsetAttributeValue; + unsigned m_sourceSize; + bool m_sourceSizeSet; bool m_isCORSEnabled; StoredCredentials m_allowCredentials; + RefPtr<MediaValues> m_mediaValues; }; -TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL, float deviceScaleFactor) +TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL, PassRefPtr<MediaValues> mediaValues) : m_documentURL(documentURL) , m_inStyle(false) - , m_deviceScaleFactor(deviceScaleFactor) + , m_inPicture(false) , m_templateCount(0) + , m_mediaValues(mediaValues) { } @@ -328,7 +415,10 @@ void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& if (m_inStyle) m_cssScanner.reset(); m_inStyle = false; + return; } + if (match(tagImpl, pictureTag)) + m_inPicture = false; return; } case HTMLToken::StartTag: { @@ -350,9 +440,16 @@ void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& updatePredictedBaseURL(token); return; } + if (RuntimeEnabledFeatures::pictureEnabled() && (match(tagImpl, pictureTag))) { + m_inPicture = true; + m_pictureSourceURL = String(); + return; + } - StartTagScanner scanner(tagImpl, m_deviceScaleFactor); + StartTagScanner scanner(tagImpl, m_mediaValues); scanner.processAttributes(token.attributes()); + if (m_inPicture) + scanner.handlePictureSourceURL(m_pictureSourceURL); OwnPtr<PreloadRequest> request = scanner.createPreloadRequest(m_predictedBaseElementURL, source); if (request) requests.append(request.release()); @@ -372,8 +469,8 @@ void TokenPreloadScanner::updatePredictedBaseURL(const Token& token) m_predictedBaseElementURL = KURL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy(); } -HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL, float deviceScaleFactor) - : m_scanner(documentURL, deviceScaleFactor) +HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL, PassRefPtr<MediaValues> mediaValues) + : m_scanner(documentURL, mediaValues) , m_tokenizer(HTMLTokenizer::create(options)) { } @@ -391,6 +488,8 @@ void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& star { ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread. + TRACE_EVENT1("webkit", "HTMLPreloadScanner::scan", "source_length", m_source.length()); + // When we start scanning, our best prediction of the baseElementURL is the real one! if (!startingBaseElementURL.isEmpty()) m_scanner.setPredictedBaseElementURL(startingBaseElementURL); @@ -399,7 +498,7 @@ void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& star while (m_tokenizer->nextToken(m_source, m_token)) { if (m_token.type() == HTMLToken::StartTag) - m_tokenizer->updateStateFor(AtomicString(m_token.name())); + m_tokenizer->updateStateFor(attemptStaticStringCreation(m_token.name(), Likely8Bit)); m_scanner.scan(m_token, m_source, requests); m_token.clear(); } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLPreloadScanner.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLPreloadScanner.h index 956d30b3d28..96046e1259d 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLPreloadScanner.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLPreloadScanner.h @@ -40,11 +40,12 @@ typedef size_t TokenPreloadScannerCheckpoint; class HTMLParserOptions; class HTMLTokenizer; class SegmentedString; +class MediaValues; class TokenPreloadScanner { WTF_MAKE_NONCOPYABLE(TokenPreloadScanner); WTF_MAKE_FAST_ALLOCATED; public: - TokenPreloadScanner(const KURL& documentURL, float deviceScaleFactor); + TokenPreloadScanner(const KURL& documentURL, PassRefPtr<MediaValues>); ~TokenPreloadScanner(); void scan(const HTMLToken&, const SegmentedString&, PreloadRequestStream& requests); @@ -89,8 +90,10 @@ private: const KURL m_documentURL; KURL m_predictedBaseElementURL; bool m_inStyle; - float m_deviceScaleFactor; + bool m_inPicture; + String m_pictureSourceURL; size_t m_templateCount; + RefPtr<MediaValues> m_mediaValues; Vector<Checkpoint> m_checkpoints; }; @@ -98,7 +101,7 @@ private: class HTMLPreloadScanner { WTF_MAKE_NONCOPYABLE(HTMLPreloadScanner); WTF_MAKE_FAST_ALLOCATED; public: - HTMLPreloadScanner(const HTMLParserOptions&, const KURL& documentURL, float deviceScaleFactor); + HTMLPreloadScanner(const HTMLParserOptions&, const KURL& documentURL, PassRefPtr<MediaValues>); ~HTMLPreloadScanner(); void appendToEnd(const SegmentedString&); diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLResourcePreloader.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLResourcePreloader.cpp index 08e70a6a278..76857627141 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLResourcePreloader.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLResourcePreloader.cpp @@ -29,9 +29,7 @@ #include "core/dom/Document.h" #include "core/fetch/FetchInitiatorInfo.h" #include "core/fetch/ResourceFetcher.h" -#include "core/html/HTMLImport.h" -#include "core/css/MediaList.h" -#include "core/css/MediaQueryEvaluator.h" +#include "core/html/imports/HTMLImport.h" #include "core/rendering/RenderObject.h" #include "public/platform/Platform.h" @@ -42,20 +40,19 @@ bool PreloadRequest::isSafeToSendToAnotherThread() const return m_initiatorName.isSafeToSendToAnotherThread() && m_charset.isSafeToSendToAnotherThread() && m_resourceURL.isSafeToSendToAnotherThread() - && m_mediaAttribute.isSafeToSendToAnotherThread() && m_baseURL.isSafeToSendToAnotherThread(); } KURL PreloadRequest::completeURL(Document* document) { - return document->completeURL(m_resourceURL, m_baseURL.isEmpty() ? document->url() : m_baseURL); + return document->completeURLWithOverride(m_resourceURL, m_baseURL.isEmpty() ? document->url() : m_baseURL); } FetchRequest PreloadRequest::resourceRequest(Document* document) { ASSERT(isMainThread()); FetchInitiatorInfo initiatorInfo; - initiatorInfo.name = m_initiatorName; + initiatorInfo.name = AtomicString(m_initiatorName); initiatorInfo.position = m_initiatorPosition; FetchRequest request(ResourceRequest(completeURL(document)), initiatorInfo); @@ -73,28 +70,11 @@ void HTMLResourcePreloader::takeAndPreload(PreloadRequestStream& r) preload(it->release()); } -static bool mediaAttributeMatches(Frame* frame, RenderStyle* renderStyle, const String& attributeValue) -{ - RefPtr<MediaQuerySet> mediaQueries = MediaQuerySet::create(attributeValue); - MediaQueryEvaluator mediaQueryEvaluator("screen", frame, renderStyle); - return mediaQueryEvaluator.eval(mediaQueries.get()); -} - void HTMLResourcePreloader::preload(PassOwnPtr<PreloadRequest> preload) { - Document* executingDocument = m_document->import() ? m_document->import()->master() : m_document; - Document* loadingDocument = m_document; - - ASSERT(executingDocument->frame()); - ASSERT(executingDocument->renderer()); - ASSERT(executingDocument->renderer()->style()); - if (!preload->media().isEmpty() && !mediaAttributeMatches(executingDocument->frame(), executingDocument->renderer()->style(), preload->media())) - return; - FetchRequest request = preload->resourceRequest(m_document); blink::Platform::current()->histogramCustomCounts("WebCore.PreloadDelayMs", static_cast<int>(1000 * (monotonicallyIncreasingTime() - preload->discoveryTime())), 0, 2000, 20); - loadingDocument->fetcher()->preload(preload->resourceType(), request, preload->charset()); + m_document->fetcher()->preload(preload->resourceType(), request, preload->charset()); } - } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLResourcePreloader.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLResourcePreloader.h index 48686b6ca73..ee6868c143c 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLResourcePreloader.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLResourcePreloader.h @@ -35,14 +35,9 @@ namespace WebCore { class PreloadRequest { public: - static PassOwnPtr<PreloadRequest> create(const String& initiatorName, const TextPosition& initiatorPosition, const String& resourceURL, const KURL& baseURL, Resource::Type resourceType, const String& mediaAttribute) - { - return adoptPtr(new PreloadRequest(initiatorName, initiatorPosition, resourceURL, baseURL, resourceType, mediaAttribute)); - } - static PassOwnPtr<PreloadRequest> create(const String& initiatorName, const TextPosition& initiatorPosition, const String& resourceURL, const KURL& baseURL, Resource::Type resourceType) { - return adoptPtr(new PreloadRequest(initiatorName, initiatorPosition, resourceURL, baseURL, resourceType, "")); + return adoptPtr(new PreloadRequest(initiatorName, initiatorPosition, resourceURL, baseURL, resourceType)); } bool isSafeToSendToAnotherThread() const; @@ -50,7 +45,6 @@ public: FetchRequest resourceRequest(Document*); const String& charset() const { return m_charset; } - const String& media() const { return m_mediaAttribute; } double discoveryTime() const { return m_discoveryTime; } void setCharset(const String& charset) { m_charset = charset.isolatedCopy(); } void setCrossOriginEnabled(StoredCredentials allowCredentials) @@ -62,13 +56,12 @@ public: Resource::Type resourceType() const { return m_resourceType; } private: - PreloadRequest(const String& initiatorName, const TextPosition& initiatorPosition, const String& resourceURL, const KURL& baseURL, Resource::Type resourceType, const String& mediaAttribute) + PreloadRequest(const String& initiatorName, const TextPosition& initiatorPosition, const String& resourceURL, const KURL& baseURL, Resource::Type resourceType) : m_initiatorName(initiatorName) , m_initiatorPosition(initiatorPosition) , m_resourceURL(resourceURL.isolatedCopy()) , m_baseURL(baseURL.copy()) , m_resourceType(resourceType) - , m_mediaAttribute(mediaAttribute.isolatedCopy()) , m_isCORSEnabled(false) , m_allowCredentials(DoNotAllowStoredCredentials) , m_discoveryTime(monotonicallyIncreasingTime()) @@ -83,7 +76,6 @@ private: KURL m_baseURL; String m_charset; Resource::Type m_resourceType; - String m_mediaAttribute; bool m_isCORSEnabled; StoredCredentials m_allowCredentials; double m_discoveryTime; diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLScriptRunner.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLScriptRunner.cpp index 1cb6555c243..70c4c25f3b5 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLScriptRunner.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLScriptRunner.cpp @@ -33,10 +33,10 @@ #include "core/dom/Microtask.h" #include "core/dom/ScriptLoader.h" #include "core/fetch/ScriptResource.h" +#include "core/frame/LocalFrame.h" #include "core/html/parser/HTMLInputStream.h" #include "core/html/parser/HTMLScriptRunnerHost.h" #include "core/html/parser/NestingLevelIncrementer.h" -#include "core/frame/Frame.h" #include "platform/NotImplemented.h" namespace WebCore { @@ -54,7 +54,23 @@ HTMLScriptRunner::HTMLScriptRunner(Document* document, HTMLScriptRunnerHost* hos HTMLScriptRunner::~HTMLScriptRunner() { - // FIXME: Should we be passed a "done loading/parsing" callback sooner than destruction? +#if ENABLE(OILPAN) + // If the document is destructed without having explicitly + // detached the parser (and this script runner object), perform + // detach steps now. This will happen if the Document, the parser + // and this script runner object are swept out in the same GC. + detach(); +#else + // Verify that detach() has been called. + ASSERT(!m_document); +#endif +} + +void HTMLScriptRunner::detach() +{ + if (!m_document) + return; + if (m_parserBlockingScript.resource() && m_parserBlockingScript.watchingForLoad()) stopWatchingForLoad(m_parserBlockingScript); @@ -63,23 +79,25 @@ HTMLScriptRunner::~HTMLScriptRunner() if (pendingScript.resource() && pendingScript.watchingForLoad()) stopWatchingForLoad(pendingScript); } -} - -void HTMLScriptRunner::detach() -{ - m_document = 0; + m_document = nullptr; } static KURL documentURLForScriptExecution(Document* document) { - if (!document || !document->frame()) + if (!document) + return KURL(); + + if (!document->frame()) { + if (document->importsController()) + return document->url(); return KURL(); + } // Use the URL of the currently active document for this frame. return document->frame()->document()->url(); } -inline PassRefPtr<Event> createScriptLoadEvent() +inline PassRefPtrWillBeRawPtr<Event> createScriptLoadEvent() { return Event::create(EventTypeNames::load); } @@ -97,7 +115,7 @@ ScriptSourceCode HTMLScriptRunner::sourceFromPendingScript(const PendingScript& bool HTMLScriptRunner::isPendingScriptReady(const PendingScript& script) { - m_hasScriptsWaitingForResources = !m_document->haveStylesheetsAndImportsLoaded(); + m_hasScriptsWaitingForResources = !m_document->isScriptExecutionReady(); if (m_hasScriptsWaitingForResources) return false; if (script.resource() && !script.resource()->isLoaded()) @@ -109,14 +127,14 @@ void HTMLScriptRunner::executeParsingBlockingScript() { ASSERT(m_document); ASSERT(!isExecutingScript()); - ASSERT(m_document->haveStylesheetsAndImportsLoaded()); + ASSERT(m_document->isScriptExecutionReady()); ASSERT(isPendingScriptReady(m_parserBlockingScript)); InsertionPointRecord insertionPointRecord(m_host->inputStream()); - executePendingScriptAndDispatchEvent(m_parserBlockingScript); + executePendingScriptAndDispatchEvent(m_parserBlockingScript, PendingScriptBlockingParser); } -void HTMLScriptRunner::executePendingScriptAndDispatchEvent(PendingScript& pendingScript) +void HTMLScriptRunner::executePendingScriptAndDispatchEvent(PendingScript& pendingScript, PendingScriptType pendingScriptType) { bool errorOccurred = false; ScriptSourceCode sourceCode = sourceFromPendingScript(pendingScript, errorOccurred); @@ -125,11 +143,18 @@ void HTMLScriptRunner::executePendingScriptAndDispatchEvent(PendingScript& pendi if (pendingScript.resource() && pendingScript.watchingForLoad()) stopWatchingForLoad(pendingScript); - if (!isExecutingScript()) + if (!isExecutingScript()) { Microtask::performCheckpoint(); + if (pendingScriptType == PendingScriptBlockingParser) { + m_hasScriptsWaitingForResources = !m_document->isScriptExecutionReady(); + // The parser cannot be unblocked as a microtask requested another resource + if (m_hasScriptsWaitingForResources) + return; + } + } // Clear the pending script before possible rentrancy from executeScript() - RefPtr<Element> element = pendingScript.releaseElementAndClear(); + RefPtrWillBeRawPtr<Element> element = pendingScript.releaseElementAndClear(); if (ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element.get())) { NestingLevelIncrementer nestingLevelIncrementer(m_scriptNestingLevel); IgnoreDestructiveWriteCountIncrementer ignoreDestructiveWriteCountIncrementer(m_document); @@ -137,8 +162,8 @@ void HTMLScriptRunner::executePendingScriptAndDispatchEvent(PendingScript& pendi scriptLoader->dispatchErrorEvent(); else { ASSERT(isExecutingScript()); - if (scriptLoader->executePotentiallyCrossOriginScript(sourceCode)) - element->dispatchEvent(createScriptLoadEvent()); + scriptLoader->executeScript(sourceCode); + element->dispatchEvent(createScriptLoadEvent()); } } ASSERT(!isExecutingScript()); @@ -147,20 +172,30 @@ void HTMLScriptRunner::executePendingScriptAndDispatchEvent(PendingScript& pendi void HTMLScriptRunner::watchForLoad(PendingScript& pendingScript) { ASSERT(!pendingScript.watchingForLoad()); - m_host->watchForLoad(pendingScript.resource()); + ASSERT(!pendingScript.resource()->isLoaded()); + // addClient() will call notifyFinished() if the load is complete. + // Callers do not expect to be re-entered from this call, so they + // should not become a client of an already-loaded Resource. + pendingScript.resource()->addClient(this); pendingScript.setWatchingForLoad(true); } void HTMLScriptRunner::stopWatchingForLoad(PendingScript& pendingScript) { ASSERT(pendingScript.watchingForLoad()); - m_host->stopWatchingForLoad(pendingScript.resource()); + pendingScript.resource()->removeClient(this); pendingScript.setWatchingForLoad(false); } -// This function should match 10.2.5.11 "An end tag whose tag name is 'script'" -// Script handling lives outside the tree builder to keep the each class simple. -void HTMLScriptRunner::execute(PassRefPtr<Element> scriptElement, const TextPosition& scriptStartPosition) +void HTMLScriptRunner::notifyFinished(Resource* cachedResource) +{ + m_host->notifyScriptLoaded(cachedResource); +} + +// Implements the steps for 'An end tag whose tag name is "script"' +// http://whatwg.org/html#scriptEndTag +// Script handling lives outside the tree builder to keep each class simple. +void HTMLScriptRunner::execute(PassRefPtrWillBeRawPtr<Element> scriptElement, const TextPosition& scriptStartPosition) { ASSERT(scriptElement); // FIXME: If scripting is disabled, always just return. @@ -207,7 +242,7 @@ void HTMLScriptRunner::executeScriptsWaitingForResources() // to prevent parser or script re-entry during </style> parsing. ASSERT(hasScriptsWaitingForResources()); ASSERT(!isExecutingScript()); - ASSERT(m_document->haveStylesheetsAndImportsLoaded()); + ASSERT(m_document->isScriptExecutionReady()); executeParsingBlockingScripts(); } @@ -222,7 +257,7 @@ bool HTMLScriptRunner::executeScriptsWaitingForParsing() return false; } PendingScript first = m_scriptsToExecuteAfterParsing.takeFirst(); - executePendingScriptAndDispatchEvent(first); + executePendingScriptAndDispatchEvent(first, PendingScriptDeferred); // FIXME: What is this m_document check for? if (!m_document) return false; @@ -268,8 +303,8 @@ bool HTMLScriptRunner::requestPendingScript(PendingScript& pendingScript, Elemen return true; } -// This method is meant to match the HTML5 definition of "running a script" -// http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#running-a-script +// Implements the initial steps for 'An end tag whose tag name is "script"' +// http://whatwg.org/html#scriptEndTag void HTMLScriptRunner::runScript(Element* script, const TextPosition& scriptStartPosition) { ASSERT(m_document); @@ -285,10 +320,8 @@ void HTMLScriptRunner::runScript(Element* script, const TextPosition& scriptStar if (!scriptLoader) return; - // FIXME: This may be too agressive as we always deliver mutations at - // every script element, even if it's not ready to execute yet. There's - // unfortuantely no obvious way to tell if prepareScript is going to - // execute the script from out here. + ASSERT(scriptLoader->isParserInserted()); + if (!isExecutingScript()) Microtask::performCheckpoint(); @@ -316,4 +349,12 @@ void HTMLScriptRunner::runScript(Element* script, const TextPosition& scriptStar } } +void HTMLScriptRunner::trace(Visitor* visitor) +{ + visitor->trace(m_document); + visitor->trace(m_host); + visitor->trace(m_parserBlockingScript); + visitor->trace(m_scriptsToExecuteAfterParsing); +} + } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLScriptRunner.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLScriptRunner.h index 82d8bc2568a..46f2979bca1 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLScriptRunner.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLScriptRunner.h @@ -27,6 +27,8 @@ #define HTMLScriptRunner_h #include "core/dom/PendingScript.h" +#include "core/fetch/ResourceClient.h" +#include "platform/heap/Handle.h" #include "wtf/Deque.h" #include "wtf/PassRefPtr.h" #include "wtf/text/TextPosition.h" @@ -37,23 +39,23 @@ class Resource; class ScriptResource; class Document; class Element; -class Frame; +class LocalFrame; class HTMLScriptRunnerHost; class ScriptSourceCode; -class HTMLScriptRunner { - WTF_MAKE_NONCOPYABLE(HTMLScriptRunner); WTF_MAKE_FAST_ALLOCATED; +class HTMLScriptRunner FINAL : public NoBaseWillBeGarbageCollectedFinalized<HTMLScriptRunner>, private ResourceClient { + WTF_MAKE_NONCOPYABLE(HTMLScriptRunner); WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED; public: - static PassOwnPtr<HTMLScriptRunner> create(Document* document, HTMLScriptRunnerHost* host) + static PassOwnPtrWillBeRawPtr<HTMLScriptRunner> create(Document* document, HTMLScriptRunnerHost* host) { - return adoptPtr(new HTMLScriptRunner(document, host)); + return adoptPtrWillBeNoop(new HTMLScriptRunner(document, host)); } ~HTMLScriptRunner(); void detach(); // Processes the passed in script and any pending scripts if possible. - void execute(PassRefPtr<Element> scriptToProcess, const TextPosition& scriptStartPosition); + void execute(PassRefPtrWillBeRawPtr<Element> scriptToProcess, const TextPosition& scriptStartPosition); void executeScriptsWaitingForLoad(Resource*); bool hasScriptsWaitingForResources() const { return m_hasScriptsWaitingForResources; } @@ -63,13 +65,23 @@ public: bool hasParserBlockingScript() const; bool isExecutingScript() const { return !!m_scriptNestingLevel; } + // ResourceClient + virtual void notifyFinished(Resource*) OVERRIDE; + + void trace(Visitor*); + private: HTMLScriptRunner(Document*, HTMLScriptRunnerHost*); - Frame* frame() const; + LocalFrame* frame() const; + + enum PendingScriptType { + PendingScriptBlockingParser, + PendingScriptDeferred + }; void executeParsingBlockingScript(); - void executePendingScriptAndDispatchEvent(PendingScript&); + void executePendingScriptAndDispatchEvent(PendingScript&, PendingScriptType); void executeParsingBlockingScripts(); void requestParsingBlockingScript(Element*); @@ -84,8 +96,8 @@ private: bool isPendingScriptReady(const PendingScript&); ScriptSourceCode sourceFromPendingScript(const PendingScript&, bool& errorOccurred) const; - Document* m_document; - HTMLScriptRunnerHost* m_host; + RawPtrWillBeMember<Document> m_document; + RawPtrWillBeMember<HTMLScriptRunnerHost> m_host; PendingScript m_parserBlockingScript; Deque<PendingScript> m_scriptsToExecuteAfterParsing; // http://www.whatwg.org/specs/web-apps/current-work/#list-of-scripts-that-will-execute-when-the-document-has-finished-parsing unsigned m_scriptNestingLevel; diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLScriptRunnerHost.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLScriptRunnerHost.h index e5323a2c3ae..ec3a8b68925 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLScriptRunnerHost.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLScriptRunnerHost.h @@ -30,20 +30,16 @@ namespace WebCore { -class Resource; -class Element; class HTMLInputStream; -class ScriptSourceCode; +class Resource; +class Visitor; -class HTMLScriptRunnerHost { +class HTMLScriptRunnerHost : public WillBeGarbageCollectedMixin { public: virtual ~HTMLScriptRunnerHost() { } + virtual void trace(Visitor*) { } - // Implementors should call cachedResource->addClient() here or soon after. - virtual void watchForLoad(Resource*) = 0; - // Implementors must call cachedResource->removeClient() immediately. - virtual void stopWatchingForLoad(Resource*) = 0; - + virtual void notifyScriptLoaded(Resource*) = 0; virtual HTMLInputStream& inputStream() = 0; virtual bool hasPreloadScanner() const = 0; diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLSrcsetParser.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLSrcsetParser.cpp index 7002dffab3b..f1af160e354 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLSrcsetParser.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLSrcsetParser.cpp @@ -1,4 +1,5 @@ /* + * Copyright (C) 2013 Apple Inc. All rights reserved. * Copyright (C) 2013 Google Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,50 +34,176 @@ #include "core/html/parser/HTMLParserIdioms.h" #include "platform/ParsingUtilities.h" +#include "platform/RuntimeEnabledFeatures.h" namespace WebCore { -static bool compareByScaleFactor(const ImageCandidate& first, const ImageCandidate& second) +static bool compareByDensity(const ImageCandidate& first, const ImageCandidate& second) { - return first.scaleFactor() < second.scaleFactor(); + return first.density() < second.density(); } +enum DescriptorTokenizerState { + Start, + InParenthesis, + AfterToken, +}; + +struct DescriptorToken { + unsigned start; + unsigned length; + + DescriptorToken(unsigned start, unsigned length) + : start(start) + , length(length) + { + } + + unsigned lastIndex() + { + return start + length - 1; + } + + template<typename CharType> + int toInt(const CharType* attribute, bool& isValid) + { + return charactersToInt(attribute + start, length - 1, &isValid); + } + + template<typename CharType> + float toFloat(const CharType* attribute, bool& isValid) + { + return charactersToFloat(attribute + start, length - 1, &isValid); + } +}; + template<typename CharType> -inline bool isComma(CharType character) +static void appendDescriptorAndReset(const CharType* attributeStart, const CharType*& descriptorStart, const CharType* position, Vector<DescriptorToken>& descriptors) { - return character == ','; + if (position > descriptorStart) + descriptors.append(DescriptorToken(descriptorStart - attributeStart, position - descriptorStart)); + descriptorStart = 0; } +// The following is called appendCharacter to match the spec's terminology. template<typename CharType> -static bool parseDescriptors(const CharType* descriptorsStart, const CharType* descriptorsEnd, float& imgScaleFactor) +static void appendCharacter(const CharType* descriptorStart, const CharType* position) { - const CharType* position = descriptorsStart; - bool isValid = true; - bool isScaleFactorFound = false; - while (position < descriptorsEnd) { - // 13.1. Let descriptor list be the result of splitting unparsed descriptors on spaces. - skipWhile<CharType, isHTMLSpace<CharType> >(position, descriptorsEnd); - const CharType* currentDescriptorStart = position; - skipWhile<CharType, isNotHTMLSpace<CharType> >(position, descriptorsEnd); - const CharType* currentDescriptorEnd = position; + // Since we don't copy the tokens, this just set the point where the descriptor tokens start. + if (!descriptorStart) + descriptorStart = position; +} +template<typename CharType> +static bool isEOF(const CharType* position, const CharType* end) +{ + return position >= end; +} + +template<typename CharType> +static void tokenizeDescriptors(const CharType* attributeStart, + const CharType*& position, + const CharType* attributeEnd, + Vector<DescriptorToken>& descriptors) +{ + DescriptorTokenizerState state = Start; + const CharType* descriptorsStart = position; + const CharType* currentDescriptorStart = descriptorsStart; + while (true) { + switch (state) { + case Start: + if (isEOF(position, attributeEnd)) { + appendDescriptorAndReset(attributeStart, currentDescriptorStart, attributeEnd, descriptors); + return; + } + if (isComma(*position)) { + appendDescriptorAndReset(attributeStart, currentDescriptorStart, position, descriptors); + ++position; + return; + } + if (isHTMLSpace(*position)) { + appendDescriptorAndReset(attributeStart, currentDescriptorStart, position, descriptors); + currentDescriptorStart = position + 1; + state = AfterToken; + } else if (*position == '(') { + appendCharacter(currentDescriptorStart, position); + state = InParenthesis; + } else { + appendCharacter(currentDescriptorStart, position); + } + break; + case InParenthesis: + if (isEOF(position, attributeEnd)) { + appendDescriptorAndReset(attributeStart, currentDescriptorStart, attributeEnd, descriptors); + return; + } + if (*position == ')') { + appendCharacter(currentDescriptorStart, position); + state = Start; + } else { + appendCharacter(currentDescriptorStart, position); + } + break; + case AfterToken: + if (isEOF(position, attributeEnd)) + return; + if (!isHTMLSpace(*position)) { + state = Start; + currentDescriptorStart = position; + --position; + } + break; + } ++position; - ASSERT(currentDescriptorEnd > currentDescriptorStart); - --currentDescriptorEnd; - unsigned descriptorLength = currentDescriptorEnd - currentDescriptorStart; - if (*currentDescriptorEnd == 'x') { - if (isScaleFactorFound) - return false; - imgScaleFactor = charactersToFloat(currentDescriptorStart, descriptorLength, &isValid); - isScaleFactorFound = true; - } else { + } +} + +template<typename CharType> +static bool parseDescriptors(const CharType* attribute, Vector<DescriptorToken>& descriptors, DescriptorParsingResult& result) +{ + for (Vector<DescriptorToken>::iterator it = descriptors.begin(); it != descriptors.end(); ++it) { + if (it->length == 0) continue; + CharType c = attribute[it->lastIndex()]; + bool isValid = false; + if (RuntimeEnabledFeatures::pictureSizesEnabled() && c == 'w') { + if (result.hasDensity() || result.hasWidth()) + return false; + int resourceWidth = it->toInt(attribute, isValid); + if (!isValid || resourceWidth <= 0) + return false; + result.setResourceWidth(resourceWidth); + } else if (RuntimeEnabledFeatures::pictureSizesEnabled() && c == 'h') { + // This is here only for future compat purposes. + // The value of the 'h' descriptor is not used. + if (result.hasDensity() || result.hasHeight()) + return false; + int resourceHeight = it->toInt(attribute, isValid); + if (!isValid || resourceHeight <= 0) + return false; + result.setResourceHeight(resourceHeight); + } else if (c == 'x') { + if (result.hasDensity() || result.hasHeight() || result.hasWidth()) + return false; + float density = it->toFloat(attribute, isValid); + if (!isValid || density < 0) + return false; + result.setDensity(density); } } - return isValid; + return true; } -// http://www.whatwg.org/specs/web-apps/current-work/multipage/embedded-content-1.html#processing-the-image-candidates +static bool parseDescriptors(const String& attribute, Vector<DescriptorToken>& descriptors, DescriptorParsingResult& result) +{ + // FIXME: See if StringView can't be extended to replace DescriptorToken here. + if (attribute.is8Bit()) { + return parseDescriptors(attribute.characters8(), descriptors, result); + } + return parseDescriptors(attribute.characters16(), descriptors, result); +} + +// http://picture.responsiveimages.org/#parse-srcset-attr template<typename CharType> static void parseImageCandidatesFromSrcsetAttribute(const String& attribute, const CharType* attributeStart, unsigned length, Vector<ImageCandidate>& imageCandidates) { @@ -84,37 +211,46 @@ static void parseImageCandidatesFromSrcsetAttribute(const String& attribute, con const CharType* attributeEnd = position + length; while (position < attributeEnd) { - float imgScaleFactor = 1.0; - - // 4. Splitting loop: Skip whitespace. - skipWhile<CharType, isHTMLSpace<CharType> >(position, attributeEnd); - if (position == attributeEnd) + // 4. Splitting loop: Collect a sequence of characters that are space characters or U+002C COMMA characters. + skipWhile<CharType, isHTMLSpaceOrComma<CharType> >(position, attributeEnd); + if (position == attributeEnd) { + // Contrary to spec language - descriptor parsing happens on each candidate, so when we reach the attributeEnd, we can exit. break; - const CharType* imageURLStart = position; - - // If The current candidate is either totally empty or only contains space, skipping. - if (*position == ',') { - ++position; - continue; } + const CharType* imageURLStart = position; + // 6. Collect a sequence of characters that are not space characters, and let that be url. - // 5. Collect a sequence of characters that are not space characters, and let that be url. skipUntil<CharType, isHTMLSpace<CharType> >(position, attributeEnd); const CharType* imageURLEnd = position; - if (position != attributeEnd && *(position - 1) == ',') { - --imageURLEnd; + DescriptorParsingResult result; + + // 8. If url ends with a U+002C COMMA character (,) + if (isComma(*(position - 1))) { + // Remove all trailing U+002C COMMA characters from url. + imageURLEnd = position - 1; + reverseSkipWhile<CharType, isComma>(imageURLEnd, imageURLStart); + ++imageURLEnd; + // If url is empty, then jump to the step labeled splitting loop. + if (imageURLStart == imageURLEnd) + continue; } else { - // 7. Collect a sequence of characters that are not "," (U+002C) characters, and let that be descriptors. - skipWhile<CharType, isHTMLSpace<CharType> >(position, attributeEnd); - const CharType* descriptorsStart = position; - skipUntil<CharType, isComma<CharType> >(position, attributeEnd); - const CharType* descriptorsEnd = position; - if (!parseDescriptors(descriptorsStart, descriptorsEnd, imgScaleFactor)) + // Advancing position here (contrary to spec) to avoid an useless extra state machine step. + // Filed a spec bug: https://github.com/ResponsiveImagesCG/picture-element/issues/189 + ++position; + Vector<DescriptorToken> descriptorTokens; + tokenizeDescriptors(attributeStart, position, attributeEnd, descriptorTokens); + // Contrary to spec language - descriptor parsing happens on each candidate. + // This is a black-box equivalent, to avoid storing descriptor lists for each candidate. + if (!parseDescriptors(attribute, descriptorTokens, result)) continue; } - imageCandidates.append(ImageCandidate(attribute, imageURLStart - attributeStart, imageURLEnd - imageURLStart, imgScaleFactor)); + ASSERT(imageURLEnd > attributeStart); + unsigned imageURLStartingPosition = imageURLStart - attributeStart; + ASSERT(imageURLEnd > imageURLStart); + unsigned imageURLLength = imageURLEnd - imageURLStart; + imageCandidates.append(ImageCandidate(attribute, imageURLStartingPosition, imageURLLength, result, ImageCandidate::SrcsetOrigin)); // 11. Return to the step labeled splitting loop. } } @@ -130,36 +266,61 @@ static void parseImageCandidatesFromSrcsetAttribute(const String& attribute, Vec parseImageCandidatesFromSrcsetAttribute<UChar>(attribute, attribute.characters16(), attribute.length(), imageCandidates); } -static ImageCandidate pickBestImageCandidate(float deviceScaleFactor, Vector<ImageCandidate>& imageCandidates) +static ImageCandidate pickBestImageCandidate(float deviceScaleFactor, unsigned sourceSize, Vector<ImageCandidate>& imageCandidates) { + const float defaultDensityValue = 1.0; + bool ignoreSrc = false; if (imageCandidates.isEmpty()) return ImageCandidate(); - std::stable_sort(imageCandidates.begin(), imageCandidates.end(), compareByScaleFactor); + // http://picture.responsiveimages.org/#normalize-source-densities + for (Vector<ImageCandidate>::iterator it = imageCandidates.begin(); it != imageCandidates.end(); ++it) { + if (it->resourceWidth() > 0) { + it->setDensity((float)it->resourceWidth() / (float)sourceSize); + ignoreSrc = true; + } else if (it->density() < 0) { + it->setDensity(defaultDensityValue); + } + } + + std::stable_sort(imageCandidates.begin(), imageCandidates.end(), compareByDensity); unsigned i; for (i = 0; i < imageCandidates.size() - 1; ++i) { - if (imageCandidates[i].scaleFactor() >= deviceScaleFactor) + if ((imageCandidates[i].density() >= deviceScaleFactor) && (!ignoreSrc || !imageCandidates[i].srcOrigin())) break; } - return imageCandidates[i]; + + if (imageCandidates[i].srcOrigin() && ignoreSrc) { + ASSERT(i > 0); + --i; + } + float winningDensity = imageCandidates[i].density(); + + unsigned winner = i; + // 16. If an entry b in candidates has the same associated ... pixel density as an earlier entry a in candidates, + // then remove entry b + while ((i > 0) && (imageCandidates[--i].density() == winningDensity)) + winner = i; + + return imageCandidates[winner]; } -ImageCandidate bestFitSourceForSrcsetAttribute(float deviceScaleFactor, const String& srcsetAttribute) +ImageCandidate bestFitSourceForSrcsetAttribute(float deviceScaleFactor, unsigned sourceSize, const String& srcsetAttribute) { Vector<ImageCandidate> imageCandidates; parseImageCandidatesFromSrcsetAttribute(srcsetAttribute, imageCandidates); - return pickBestImageCandidate(deviceScaleFactor, imageCandidates); + return pickBestImageCandidate(deviceScaleFactor, sourceSize, imageCandidates); } -ImageCandidate bestFitSourceForImageAttributes(float deviceScaleFactor, const String& srcAttribute, const String& srcsetAttribute) +ImageCandidate bestFitSourceForImageAttributes(float deviceScaleFactor, unsigned sourceSize, const String& srcAttribute, const String& srcsetAttribute) { if (srcsetAttribute.isNull()) { if (srcAttribute.isNull()) return ImageCandidate(); - return ImageCandidate(srcAttribute, 0, srcAttribute.length(), 1); + return ImageCandidate(srcAttribute, 0, srcAttribute.length(), DescriptorParsingResult(), ImageCandidate::SrcOrigin); } Vector<ImageCandidate> imageCandidates; @@ -167,12 +328,12 @@ ImageCandidate bestFitSourceForImageAttributes(float deviceScaleFactor, const St parseImageCandidatesFromSrcsetAttribute(srcsetAttribute, imageCandidates); if (!srcAttribute.isEmpty()) - imageCandidates.append(ImageCandidate(srcAttribute, 0, srcAttribute.length(), 1.0)); + imageCandidates.append(ImageCandidate(srcAttribute, 0, srcAttribute.length(), DescriptorParsingResult(), ImageCandidate::SrcOrigin)); - return pickBestImageCandidate(deviceScaleFactor, imageCandidates); + return pickBestImageCandidate(deviceScaleFactor, sourceSize, imageCandidates); } -String bestFitSourceForImageAttributes(float deviceScaleFactor, const String& srcAttribute, ImageCandidate& srcsetImageCandidate) +String bestFitSourceForImageAttributes(float deviceScaleFactor, unsigned sourceSize, const String& srcAttribute, ImageCandidate& srcsetImageCandidate) { if (srcsetImageCandidate.isEmpty()) return srcAttribute; @@ -181,9 +342,9 @@ String bestFitSourceForImageAttributes(float deviceScaleFactor, const String& sr imageCandidates.append(srcsetImageCandidate); if (!srcAttribute.isEmpty()) - imageCandidates.append(ImageCandidate(srcAttribute, 0, srcAttribute.length(), 1.0)); + imageCandidates.append(ImageCandidate(srcAttribute, 0, srcAttribute.length(), DescriptorParsingResult(), ImageCandidate::SrcOrigin)); - return pickBestImageCandidate(deviceScaleFactor, imageCandidates).toString(); + return pickBestImageCandidate(deviceScaleFactor, sourceSize, imageCandidates).toString(); } } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLSrcsetParser.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLSrcsetParser.h index 8964ffbcd5f..5ba5bff7f4f 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLSrcsetParser.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLSrcsetParser.h @@ -1,4 +1,5 @@ /* + * Copyright (C) 2013 Apple Inc. All rights reserved. * Copyright (C) 2013 Google Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,27 +36,85 @@ namespace WebCore { +enum { UninitializedDescriptor = -1 }; + +class DescriptorParsingResult { +public: + DescriptorParsingResult() + : m_density(UninitializedDescriptor) + , m_resourceWidth(UninitializedDescriptor) + , m_resourceHeight(UninitializedDescriptor) + { + } + + bool hasDensity() const { return m_density >= 0; } + bool hasWidth() const { return m_resourceWidth >= 0; } + bool hasHeight() const { return m_resourceHeight >= 0; } + + float density() const { ASSERT(hasDensity()); return m_density; } + unsigned resourceWidth() const { ASSERT(hasWidth()); return m_resourceWidth; } + unsigned resourceHeight() const { ASSERT(hasHeight()); return m_resourceHeight; } + + void setResourceWidth(int width) { ASSERT(width >= 0); m_resourceWidth = (unsigned)width; } + void setResourceHeight(int height) { ASSERT(height >= 0); m_resourceHeight = (unsigned)height; } + void setDensity(float densityToSet) { ASSERT(densityToSet >= 0); m_density = densityToSet; } + +private: + float m_density; + int m_resourceWidth; + int m_resourceHeight; +}; + class ImageCandidate { public: + enum OriginAttribute { + SrcsetOrigin, + SrcOrigin + }; + ImageCandidate() - : m_scaleFactor(1.0) + : m_density(1.0) + , m_resourceWidth(UninitializedDescriptor) + , m_originAttribute(SrcsetOrigin) { } - ImageCandidate(const String& source, unsigned start, unsigned length, float scaleFactor) + ImageCandidate(const String& source, unsigned start, unsigned length, const DescriptorParsingResult& result, OriginAttribute originAttribute) : m_string(source.createView(start, length)) - , m_scaleFactor(scaleFactor) + , m_density(result.hasDensity()?result.density():UninitializedDescriptor) + , m_resourceWidth(result.hasWidth()?result.resourceWidth():UninitializedDescriptor) + , m_originAttribute(originAttribute) { } String toString() const { - return m_string.toString(); + return String(m_string.toString()); + } + + AtomicString url() const + { + return AtomicString(m_string.toString()); + } + + void setDensity(float factor) + { + m_density = factor; + } + + float density() const + { + return m_density; + } + + int resourceWidth() const + { + return m_resourceWidth; } - inline float scaleFactor() const + bool srcOrigin() const { - return m_scaleFactor; + return (m_originAttribute == SrcOrigin); } inline bool isEmpty() const @@ -65,14 +124,16 @@ public: private: StringView m_string; - float m_scaleFactor; + float m_density; + int m_resourceWidth; + OriginAttribute m_originAttribute; }; -ImageCandidate bestFitSourceForSrcsetAttribute(float deviceScaleFactor, const String& srcsetAttribute); +ImageCandidate bestFitSourceForSrcsetAttribute(float deviceScaleFactor, unsigned sourceSize, const String& srcsetAttribute); -ImageCandidate bestFitSourceForImageAttributes(float deviceScaleFactor, const String& srcAttribute, const String& srcsetAttribute); +ImageCandidate bestFitSourceForImageAttributes(float deviceScaleFactor, unsigned sourceSize, const String& srcAttribute, const String& srcsetAttribute); -String bestFitSourceForImageAttributes(float deviceScaleFactor, const String& srcAttribute, ImageCandidate& srcsetImageCandidate); +String bestFitSourceForImageAttributes(float deviceScaleFactor, unsigned sourceSize, const String& srcAttribute, ImageCandidate& srcsetImageCandidate); } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLSrcsetParserTest.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLSrcsetParserTest.cpp new file mode 100644 index 00000000000..76236102f90 --- /dev/null +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLSrcsetParserTest.cpp @@ -0,0 +1,102 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "config.h" +#include "core/html/parser/HTMLSrcsetParser.h" + +#include <gtest/gtest.h> +#include <limits.h> + +namespace WebCore { + +typedef struct { + float deviceScaleFactor; + int effectiveSize; + const char* srcInput; + const char* srcsetInput; + const char* outputURL; + float outputDensity; + int outputResourceWidth; +} TestCase; + +TEST(ImageCandidateTest, Basic) +{ + ImageCandidate candidate; + ASSERT_EQ(candidate.density(), 1); + ASSERT_EQ(candidate.resourceWidth(), -1); + ASSERT_EQ(candidate.srcOrigin(), false); + +} + +TEST(HTMLSrcsetParserTest, Basic) +{ + TestCase testCases[] = { + {2.0, -1, "", "1x.gif 1x, 2x.gif 2x", "2x.gif", 2.0, -1}, + {2.0, -1, "", "1x.gif 1q, 2x.gif 2x", "2x.gif", 2.0, -1}, + {1.0, -1, "", "1x.gif 1q, 2x.gif 2x", "1x.gif", 1.0, -1}, + {1.0, -1, "", "1x.gif 1x 100h, 2x.gif 2x", "2x.gif", 2.0, -1}, + {1.0, -1, "", "1x.gif 1x 100w, 2x.gif 2x", "2x.gif", 2.0, -1}, + {1.0, -1, "", "1x.gif 1x 100h 100w, 2x.gif 2x", "2x.gif", 2.0, -1}, + {2.0, -1, "", "1x.gif 1x, 2x.gif -2x", "1x.gif", 1.0, -1}, + {2.0, -1, "", "0x.gif 0x", "0x.gif", 0.0, -1}, + {2.0, -1, "", "0x.gif -0x", "0x.gif", 0.0, -1}, + {2.0, -1, "", "neg.gif -2x", "", 1.0, -1}, + {2.0, -1, "", "1x.gif 1x, 2x.gif 2q", "1x.gif", 1.0, -1}, + {2.0, -1, "", "1x.gif, 2x.gif 2q", "1x.gif", 1.0, -1}, + {2.0, -1, "", "1x.gif , 2x.gif 2q", "1x.gif", 1.0, -1}, + {2.0, -1, "1x.gif 1x, 2x.gif 2x", "1x.gif 1x, 2x.gif 2x", "2x.gif", 2.0, -1}, + {1.0, -1, "1x.gif 1x, 2x.gif 2x", "1x.gif 1x, 2x.gif 2x", "1x.gif", 1.0, -1}, + {1.0, -1, "1x.gif 1x, 2x.gif 2x", "", "1x.gif 1x, 2x.gif 2x", 1.0, -1}, + {2.0, -1, "src.gif", "1x.gif 1x, 2x.gif 2x", "2x.gif", 2.0, -1}, + {1.0, -1, "src.gif", "1x.gif 1x, 2x.gif 2x", "1x.gif", 1.0, -1}, + {1.0, -1, "src.gif", "2x.gif 2x", "src.gif", 1.0, -1}, + {2.0, -1, "src.gif", "2x.gif 2x", "2x.gif", 2.0, -1}, + {1.5, -1, "src.gif", "2x.gif 2x", "2x.gif", 2.0, -1}, + {2.5, -1, "src.gif", "2x.gif 2x", "2x.gif", 2.0, -1}, + {2.5, -1, "src.gif", "2x.gif 2x, 3x.gif 3x", "3x.gif", 3.0, -1}, + {2.0, -1, "", "1x,, , x ,2x ", "1x", 1.0, -1}, + {2.0, -1, "", "1x,, , x ,2x ", "1x", 1.0, -1}, + {2.0, -1, "", ",,1x,, , x ,2x ", "1x", 1.0, -1}, + {2.0, -1, "", ",,1x,,", "1x", 1.0, -1}, + {2.0, -1, "", ",1x,", "1x", 1.0, -1}, + {2.0, -1, "", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg 1x, 2x.gif 2x", "2x.gif", 2.0, -1}, + {2.0, -1, "", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg 2x, 1x.gif 1x", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg", 2.0, -1}, + {2.0, -1, "", "1x,, , x ,2x , 1x.gif, 3x, 4x.gif 4x 100z, 5x.gif 5, dx.gif dx, 2x.gif 2x ,", "2x.gif", 2.0, -1}, + {4.0, -1, "", "1x,, , x ,2x , 1x.gif, 3x, 4x.gif 4x 100h, 5x.gif 5, dx.gif dx, 2x.gif 2x ,", "2x.gif", 2.0, -1}, + {4.0, -1, "", "1x,, , x ,2x , 1x.gif, 3x, 4x.gif 4x 100z, 5x.gif 5, dx.gif dx, 2x.gif 2x ,", "4x.gif", 4.0, -1}, + {1.0, -1, "", "1x,, , x ,2x , 1x.gif, 3x, 4x.gif 4x 100z, 5x.gif 5, dx.gif dx, 2x.gif 2x ,", "1x", 1.0, -1}, + {5.0, -1, "", "1x,, , x ,2x , 1x.gif, 3x, 4x.gif 4x 100z, 5x.gif 5, dx.gif dx, 2x.gif 2x ,", "4x.gif", 4.0, -1}, + {2.0, -1, "", "1x.gif 1x, data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxMDAiIGhlaWdodD0iMTAwIj4KCTxyZWN0IHdpZHRoPSIxMDAiIGhlaWdodD0iMTAwIiBmaWxsPSJncmVlbiIvPgo8L3N2Zz4K 2x", "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxMDAiIGhlaWdodD0iMTAwIj4KCTxyZWN0IHdpZHRoPSIxMDAiIGhlaWdodD0iMTAwIiBmaWxsPSJncmVlbiIvPgo8L3N2Zz4K", 2.0, -1 }, + {2.0, -1, "1x.gif", "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxMDAiIGhlaWdodD0iMTAwIj4KCTxyZWN0IHdpZHRoPSIxMDAiIGhlaWdodD0iMTAwIiBmaWxsPSJncmVlbiIvPgo8L3N2Zz4K 2x", "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxMDAiIGhlaWdodD0iMTAwIj4KCTxyZWN0IHdpZHRoPSIxMDAiIGhlaWdodD0iMTAwIiBmaWxsPSJncmVlbiIvPgo8L3N2Zz4K", 2.0, -1 }, + {2.0, -1, "1x.svg#red", "1x.svg#green 2x", "1x.svg#green", 2.0, -1 }, + {2.0, -1, "", "1x.svg#red 1x, 1x.svg#green 2x", "1x.svg#green", 2.0, -1 }, + {1.0, 400, "", "400.gif 400w, 6000.gif 6000w", "400.gif", 1.0, 400}, + {1.0, 400, "", "400.gif 400w 400h, 6000.gif 6000w", "400.gif", 1.0, 400}, + {2.0, 400, "", "400.gif 400w, 6000.gif 6000w", "6000.gif", 15.0, 6000}, + {1.0, 400, "src.gif", "800.gif 800w", "800.gif", 2.0, 800}, + {1.0, 400, "src.gif", "0.gif 0w, 800.gif 800w", "800.gif", 2.0, 800}, + {1.0, 400, "src.gif", "0.gif 0w, 2x.gif 2x", "src.gif", 1.0, -1}, + {1.0, 400, "src.gif", "800.gif 2x, 1600.gif 1600w", "800.gif", 2.0, -1}, + {1.0, 400, "", "400.gif 400w, 2x.gif 2x", "400.gif", 1.0, 400}, + {2.0, 400, "", "400.gif 400w, 2x.gif 2x", "2x.gif", 2.0, -1}, + {1.0, 0, "", "400.gif 400w, 6000.gif 6000w", "400.gif", std::numeric_limits<float>::infinity(), 400}, + {2.0, -1, "", ", 1x.gif 1x, 2x.gif 2x", "2x.gif", 2.0, -1}, + {1.0, -1, "", ",1x.gif 1x, 2x.gif 2x", "1x.gif", 1.0, -1}, + {1.2, -1, "", ",1x.gif 1x, 1.4x.gif 1.4x, 2x.gif 2x", "1.4x.gif", 1.4, -1}, + {1.0, -1, "", "inf.gif 0.00000000001x", "inf.gif", 1e-11, -1}, + {1.0, -1, "", ",1x.gif 1x future-descriptor(3x, 4h, whatever), 2x.gif 2x", "1x.gif", 1.0, -1}, + {2.0, -1, "", ",1x.gif 1x future-descriptor(3x, 4h, whatever), 2x.gif 2x", "2x.gif", 2.0, -1}, + {0, 0, 0, 0, 0, 0} // Do not remove the terminator line. + }; + + for (unsigned i = 0; testCases[i].srcInput; ++i) { + TestCase test = testCases[i]; + ImageCandidate candidate = bestFitSourceForImageAttributes(test.deviceScaleFactor, test.effectiveSize, test.srcInput, test.srcsetInput); + ASSERT_EQ(test.outputDensity, candidate.density()); + ASSERT_EQ(test.outputResourceWidth, candidate.resourceWidth()); + ASSERT_STREQ(test.outputURL, candidate.toString().ascii().data()); + } +} + +} // namespace diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLStackItem.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLStackItem.h index d3dd15ca269..e15c2c119cc 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLStackItem.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLStackItem.h @@ -26,9 +26,9 @@ #ifndef HTMLStackItem_h #define HTMLStackItem_h -#include "HTMLNames.h" -#include "MathMLNames.h" -#include "SVGNames.h" +#include "core/HTMLNames.h" +#include "core/MathMLNames.h" +#include "core/SVGNames.h" #include "core/dom/Element.h" #include "core/html/parser/AtomicHTMLToken.h" #include "wtf/RefCounted.h" @@ -39,7 +39,7 @@ namespace WebCore { class ContainerNode; -class HTMLStackItem : public RefCounted<HTMLStackItem> { +class HTMLStackItem : public RefCountedWillBeGarbageCollectedFinalized<HTMLStackItem> { public: enum ItemType { ItemForContextElement, @@ -47,15 +47,15 @@ public: }; // Used by document fragment node and context element. - static PassRefPtr<HTMLStackItem> create(PassRefPtr<ContainerNode> node, ItemType type) + static PassRefPtrWillBeRawPtr<HTMLStackItem> create(PassRefPtrWillBeRawPtr<ContainerNode> node, ItemType type) { - return adoptRef(new HTMLStackItem(node, type)); + return adoptRefWillBeNoop(new HTMLStackItem(node, type)); } // Used by HTMLElementStack and HTMLFormattingElementList. - static PassRefPtr<HTMLStackItem> create(PassRefPtr<ContainerNode> node, AtomicHTMLToken* token, const AtomicString& namespaceURI = HTMLNames::xhtmlNamespaceURI) + static PassRefPtrWillBeRawPtr<HTMLStackItem> create(PassRefPtrWillBeRawPtr<ContainerNode> node, AtomicHTMLToken* token, const AtomicString& namespaceURI = HTMLNames::xhtmlNamespaceURI) { - return adoptRef(new HTMLStackItem(node, token, namespaceURI)); + return adoptRefWillBeNoop(new HTMLStackItem(node, token, namespaceURI)); } Element* element() const { return toElement(m_node.get()); } @@ -172,7 +172,6 @@ public: || tagName == HTMLNames::iframeTag || tagName == HTMLNames::imgTag || tagName == HTMLNames::inputTag - || tagName == HTMLNames::isindexTag || tagName == HTMLNames::liTag || tagName == HTMLNames::linkTag || tagName == HTMLNames::listingTag @@ -208,8 +207,10 @@ public: || tagName == HTMLNames::xmpTag; } + void trace(Visitor* visitor) { visitor->trace(m_node); } + private: - HTMLStackItem(PassRefPtr<ContainerNode> node, ItemType type) + HTMLStackItem(PassRefPtrWillBeRawPtr<ContainerNode> node, ItemType type) : m_node(node) { switch (type) { @@ -224,7 +225,7 @@ private: } } - HTMLStackItem(PassRefPtr<ContainerNode> node, AtomicHTMLToken* token, const AtomicString& namespaceURI = HTMLNames::xhtmlNamespaceURI) + HTMLStackItem(PassRefPtrWillBeRawPtr<ContainerNode> node, AtomicHTMLToken* token, const AtomicString& namespaceURI = HTMLNames::xhtmlNamespaceURI) : m_node(node) , m_tokenLocalName(token->name()) , m_tokenAttributes(token->attributes()) @@ -233,7 +234,7 @@ private: { } - RefPtr<ContainerNode> m_node; + RefPtrWillBeMember<ContainerNode> m_node; AtomicString m_tokenLocalName; Vector<Attribute> m_tokenAttributes; diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLToken.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLToken.h index 64f3044631b..e2b67fc33aa 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLToken.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLToken.h @@ -27,7 +27,6 @@ #define HTMLToken_h #include "core/dom/Attribute.h" -#include "core/html/parser/HTMLToken.h" #include "wtf/PassOwnPtr.h" #include "wtf/RefCounted.h" #include "wtf/RefPtr.h" @@ -104,7 +103,12 @@ public: m_range.start = 0; m_range.end = 0; m_baseOffset = 0; - m_data.clear(); + // Don't call Vector::clear() as that would destroy the + // alloced VectorBuffer. If the innerHTML'd content has + // two 257 character text nodes in a row, we'll needlessly + // thrash malloc. When we finally finish the parse the + // HTMLToken will be destroyed and the VectorBuffer released. + m_data.shrink(0); m_orAllData = 0; } @@ -326,9 +330,7 @@ public: { ASSERT(character); ASSERT(m_type == StartTag || m_type == EndTag); - // FIXME: We should be able to add the following ASSERT once we fix - // https://bugs.webkit.org/show_bug.cgi?id=62971 - // ASSERT(m_currentAttribute->nameRange.start); + ASSERT(m_currentAttribute->nameRange.start); m_currentAttribute->name.append(character); } @@ -426,6 +428,7 @@ public: m_orAllData |= character; } + // Only for XSSAuditor void eraseCharacters() { ASSERT(m_type == Character); diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp index ed8c954a64b..8b03782d45e 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp @@ -28,8 +28,10 @@ #include "config.h" #include "core/html/parser/HTMLTokenizer.h" -#include "HTMLNames.h" +#include "core/HTMLNames.h" +#include "core/HTMLTokenizerNames.h" #include "core/html/parser/HTMLEntityParser.h" +#include "core/html/parser/HTMLParserIdioms.h" #include "core/html/parser/HTMLTreeBuilder.h" #include "platform/NotImplemented.h" #include "core/xml/parser/MarkupTokenizerInlines.h" @@ -37,7 +39,9 @@ #include "wtf/text/AtomicString.h" #include "wtf/unicode/Unicode.h" -using namespace WTF; +// Please don't use DEFINE_STATIC_LOCAL in this file. The HTMLTokenizer is used +// from multiple threads and DEFINE_STATIC_LOCAL isn't threadsafe. +#undef DEFINE_STATIC_LOCAL namespace WebCore { @@ -1071,11 +1075,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) END_STATE() HTML_BEGIN_STATE(MarkupDeclarationOpenState) { - DEFINE_STATIC_LOCAL(String, dashDashString, ("--")); - DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype")); - DEFINE_STATIC_LOCAL(String, cdataString, ("[CDATA[")); if (cc == '-') { - SegmentedString::LookAheadResult result = source.lookAhead(dashDashString); + SegmentedString::LookAheadResult result = source.lookAhead(HTMLTokenizerNames::dashDash); if (result == SegmentedString::DidMatch) { source.advanceAndASSERT('-'); source.advanceAndASSERT('-'); @@ -1084,14 +1085,14 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) } else if (result == SegmentedString::NotEnoughCharacters) return haveBufferedCharacterToken(); } else if (cc == 'D' || cc == 'd') { - SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString); + SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(HTMLTokenizerNames::doctype); if (result == SegmentedString::DidMatch) { advanceStringAndASSERTIgnoringCase(source, "doctype"); HTML_SWITCH_TO(DOCTYPEState); } else if (result == SegmentedString::NotEnoughCharacters) return haveBufferedCharacterToken(); } else if (cc == '[' && shouldAllowCDATA()) { - SegmentedString::LookAheadResult result = source.lookAhead(cdataString); + SegmentedString::LookAheadResult result = source.lookAhead(HTMLTokenizerNames::cdata); if (result == SegmentedString::DidMatch) { advanceStringAndASSERT(source, "[CDATA["); HTML_SWITCH_TO(CDATASectionState); @@ -1274,17 +1275,15 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) m_token->setForceQuirks(); return emitAndReconsumeIn(source, HTMLTokenizer::DataState); } else { - DEFINE_STATIC_LOCAL(String, publicString, ("public")); - DEFINE_STATIC_LOCAL(String, systemString, ("system")); if (cc == 'P' || cc == 'p') { - SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString); + SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(HTMLTokenizerNames::publicString); if (result == SegmentedString::DidMatch) { advanceStringAndASSERTIgnoringCase(source, "public"); HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState); } else if (result == SegmentedString::NotEnoughCharacters) return haveBufferedCharacterToken(); } else if (cc == 'S' || cc == 's') { - SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString); + SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(HTMLTokenizerNames::system); if (result == SegmentedString::DidMatch) { advanceStringAndASSERTIgnoringCase(source, "system"); HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState); @@ -1596,20 +1595,20 @@ String HTMLTokenizer::bufferedCharacters() const return characters.toString(); } -void HTMLTokenizer::updateStateFor(const AtomicString& tagName) +void HTMLTokenizer::updateStateFor(const String& tagName) { - if (tagName == textareaTag || tagName == titleTag) + if (threadSafeMatch(tagName, textareaTag) || threadSafeMatch(tagName, titleTag)) setState(HTMLTokenizer::RCDATAState); - else if (tagName == plaintextTag) + else if (threadSafeMatch(tagName, plaintextTag)) setState(HTMLTokenizer::PLAINTEXTState); - else if (tagName == scriptTag) + else if (threadSafeMatch(tagName, scriptTag)) setState(HTMLTokenizer::ScriptDataState); - else if (tagName == styleTag - || tagName == iframeTag - || tagName == xmpTag - || (tagName == noembedTag && m_options.pluginsEnabled) - || tagName == noframesTag - || (tagName == noscriptTag && m_options.scriptEnabled)) + else if (threadSafeMatch(tagName, styleTag) + || threadSafeMatch(tagName, iframeTag) + || threadSafeMatch(tagName, xmpTag) + || (threadSafeMatch(tagName, noembedTag) && m_options.pluginsEnabled) + || threadSafeMatch(tagName, noframesTag) + || (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled)) setState(HTMLTokenizer::RAWTEXTState); } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.h index aa7c059bf78..3a4ca7f96a1 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.h @@ -126,7 +126,6 @@ public: State state; UChar additionalAllowedCharacter; bool skipNextNewLine; - bool forceNullCharacterReplacement; bool shouldAllowCDATA; Checkpoint() @@ -134,7 +133,6 @@ public: , state() , additionalAllowedCharacter('\0') , skipNextNewLine(false) - , forceNullCharacterReplacement(false) , shouldAllowCDATA(false) { } @@ -176,7 +174,7 @@ public: // * CDATA sections in foreign content will be tokenized as bogus comments // instead of as character tokens. // - void updateStateFor(const AtomicString& tagName); + void updateStateFor(const String& tagName); bool forceNullCharacterReplacement() const { return m_forceNullCharacterReplacement; } void setForceNullCharacterReplacement(bool value) { m_forceNullCharacterReplacement = value; } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLTokenizerNames.in b/chromium/third_party/WebKit/Source/core/html/parser/HTMLTokenizerNames.in new file mode 100644 index 00000000000..0eab51f4ea0 --- /dev/null +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLTokenizerNames.in @@ -0,0 +1,8 @@ +namespace="HTMLTokenizer" + +-- Symbol=dashDash +doctype +[CDATA[ Symbol=cdata +# The symbol "public" conflicts with the C++ keyword. +public Symbol=publicString +system diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilder.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilder.cpp index 4196cde09f0..092240a2c3b 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilder.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilder.cpp @@ -27,19 +27,17 @@ #include "config.h" #include "core/html/parser/HTMLTreeBuilder.h" -#include "HTMLNames.h" -#include "MathMLNames.h" -#include "SVGNames.h" -#include "XLinkNames.h" -#include "XMLNSNames.h" -#include "XMLNames.h" #include "bindings/v8/ExceptionStatePlaceholder.h" +#include "core/HTMLNames.h" +#include "core/MathMLNames.h" +#include "core/SVGNames.h" +#include "core/XLinkNames.h" +#include "core/XMLNSNames.h" +#include "core/XMLNames.h" #include "core/dom/DocumentFragment.h" +#include "core/dom/ElementTraversal.h" #include "core/html/HTMLDocument.h" #include "core/html/HTMLFormElement.h" -#include "core/html/HTMLHtmlElement.h" -#include "core/html/HTMLOptGroupElement.h" -#include "core/html/HTMLTableElement.h" #include "core/html/parser/AtomicHTMLToken.h" #include "core/html/parser/HTMLDocumentParser.h" #include "core/html/parser/HTMLParserIdioms.h" @@ -136,18 +134,10 @@ static bool isFormattingTag(const AtomicString& tagName) return tagName == aTag || isNonAnchorFormattingTag(tagName); } -static HTMLFormElement* closestFormAncestor(Element* element) +static HTMLFormElement* closestFormAncestor(Element& element) { ASSERT(isMainThread()); - while (element) { - if (element->hasTagName(formTag)) - return toHTMLFormElement(element); - ContainerNode* parent = element->parentNode(); - if (!parent || !parent->isElementNode()) - return 0; - element = toElement(parent); - } - return 0; + return Traversal<HTMLFormElement>::firstAncestorOrSelf(element); } class HTMLTreeBuilder::CharacterTokenBuffer { @@ -305,27 +295,33 @@ HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* f , m_options(options) { ASSERT(isMainThread()); - // FIXME: This assertion will become invalid if <http://webkit.org/b/60316> is fixed. ASSERT(contextElement); - if (contextElement) { - // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm: - // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case - // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes") - // and instead use the DocumentFragment as a root node. - m_tree.openElements()->pushRootNode(HTMLStackItem::create(fragment, HTMLStackItem::ItemForDocumentFragmentNode)); - if (contextElement->hasTagName(templateTag)) - m_templateInsertionModes.append(TemplateContentsMode); + // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm: + // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case + // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes") + // and instead use the DocumentFragment as a root node. + m_tree.openElements()->pushRootNode(HTMLStackItem::create(fragment, HTMLStackItem::ItemForDocumentFragmentNode)); - resetInsertionModeAppropriately(); - m_tree.setForm(closestFormAncestor(contextElement)); - } + if (isHTMLTemplateElement(*contextElement)) + m_templateInsertionModes.append(TemplateContentsMode); + + resetInsertionModeAppropriately(); + m_tree.setForm(closestFormAncestor(*contextElement)); } HTMLTreeBuilder::~HTMLTreeBuilder() { } +void HTMLTreeBuilder::trace(Visitor* visitor) +{ + visitor->trace(m_fragmentContext); + visitor->trace(m_tree); + visitor->trace(m_parser); + visitor->trace(m_scriptToProcess); +} + void HTMLTreeBuilder::detach() { #ifndef NDEBUG @@ -339,14 +335,14 @@ void HTMLTreeBuilder::detach() } HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext() - : m_fragment(0) + : m_fragment(nullptr) { } HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement) : m_fragment(fragment) { - ASSERT(!fragment->hasChildNodes()); + ASSERT(!fragment->hasChildren()); m_contextElementStackItem = HTMLStackItem::create(contextElement, HTMLStackItem::ItemForContextElement); } @@ -354,7 +350,13 @@ HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext() { } -PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition) +void HTMLTreeBuilder::FragmentParsingContext::trace(Visitor* visitor) +{ + visitor->trace(m_fragment); + visitor->trace(m_contextElementStackItem); +} + +PassRefPtrWillBeRawPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition) { ASSERT(m_scriptToProcess); ASSERT(!m_tree.hasPendingTasks()); @@ -461,13 +463,6 @@ void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName) processFakeEndTag(tagName.localName()); } -void HTMLTreeBuilder::processFakeCharacters(const String& characters) -{ - ASSERT(!characters.isEmpty()); - CharacterTokenBuffer buffer(characters); - processCharacterBuffer(buffer); -} - void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope() { if (!m_tree.openElements()->inButtonScope(pTag.localName())) @@ -476,49 +471,6 @@ void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope() processEndTag(&endP); } -Vector<Attribute> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken* token) -{ - Vector<Attribute> attributes = token->attributes(); - for (int i = attributes.size() - 1; i >= 0; --i) { - const QualifiedName& name = attributes.at(i).name(); - if (name.matches(nameAttr) || name.matches(actionAttr) || name.matches(promptAttr)) - attributes.remove(i); - } - - attributes.append(Attribute(nameAttr, isindexTag.localName())); - return attributes; -} - -void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken* token) -{ - ASSERT(token->type() == HTMLToken::StartTag); - ASSERT(token->name() == isindexTag); - - if (m_parser->useCounter()) - m_parser->useCounter()->count(UseCounter::IsIndexElement); - - parseError(token); - if (m_tree.form()) - return; - notImplemented(); // Acknowledge self-closing flag - processFakeStartTag(formTag); - Attribute* actionAttribute = token->getAttributeItem(actionAttr); - if (actionAttribute) - m_tree.form()->setAttribute(actionAttr, actionAttribute->value()); - processFakeStartTag(hrTag); - processFakeStartTag(labelTag); - Attribute* promptAttribute = token->getAttributeItem(promptAttr); - if (promptAttribute) - processFakeCharacters(promptAttribute->value()); - else - processFakeCharacters(Locale::defaultLocale().queryString(blink::WebLocalizedString::SearchableIndexIntroduction)); - processFakeStartTag(inputTag, attributesForIsindexInput(token)); - notImplemented(); // This second set of characters may be needed by non-english locales. - processFakeEndTag(labelTag); - processFakeStartTag(hrTag); - processFakeEndTag(formTag); -} - namespace { bool isLi(const HTMLStackItem* item) @@ -540,7 +492,7 @@ void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken* token) m_framesetOk = false; HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord(); while (1) { - RefPtr<HTMLStackItem> item = nodeRecord->stackItem(); + RefPtrWillBeRawPtr<HTMLStackItem> item = nodeRecord->stackItem(); if (shouldClose(item.get())) { ASSERT(item->isElementNode()); processFakeEndTag(item->localName()); @@ -572,8 +524,8 @@ static void adjustSVGTagNameCase(AtomicHTMLToken* token) static PrefixedNameToQualifiedNameMap* caseMap = 0; if (!caseMap) { caseMap = new PrefixedNameToQualifiedNameMap; - const QualifiedName* const* svgTags = SVGNames::getSVGTags(); - mapLoweredLocalNameToName(caseMap, svgTags, SVGNames::SVGTagsCount); + OwnPtr<const QualifiedName*[]> svgTags = SVGNames::getSVGTags(); + mapLoweredLocalNameToName(caseMap, svgTags.get(), SVGNames::SVGTagsCount); } const QualifiedName& casedName = caseMap->get(token->name()); @@ -582,14 +534,14 @@ static void adjustSVGTagNameCase(AtomicHTMLToken* token) token->setName(casedName.localName()); } -template<const QualifiedName* const* getAttrs(), unsigned length> +template<PassOwnPtr<const QualifiedName*[]> getAttrs(), unsigned length> static void adjustAttributes(AtomicHTMLToken* token) { static PrefixedNameToQualifiedNameMap* caseMap = 0; if (!caseMap) { caseMap = new PrefixedNameToQualifiedNameMap; - const QualifiedName* const* attrs = getAttrs(); - mapLoweredLocalNameToName(caseMap, attrs, length); + OwnPtr<const QualifiedName*[]> attrs = getAttrs(); + mapLoweredLocalNameToName(caseMap, attrs.get(), length); } for (unsigned i = 0; i < token->attributes().size(); ++i) { @@ -627,11 +579,11 @@ static void adjustForeignAttributes(AtomicHTMLToken* token) if (!map) { map = new PrefixedNameToQualifiedNameMap; - const QualifiedName* const* attrs = XLinkNames::getXLinkAttrs(); - addNamesWithPrefix(map, xlinkAtom, attrs, XLinkNames::XLinkAttrsCount); + OwnPtr<const QualifiedName*[]> attrs = XLinkNames::getXLinkAttrs(); + addNamesWithPrefix(map, xlinkAtom, attrs.get(), XLinkNames::XLinkAttrsCount); - attrs = XMLNames::getXMLAttrs(); - addNamesWithPrefix(map, xmlAtom, attrs, XMLNames::XMLAttrsCount); + OwnPtr<const QualifiedName*[]> xmlAttrs = XMLNames::getXMLAttrs(); + addNamesWithPrefix(map, xmlAtom, xmlAttrs.get(), XMLNames::XMLAttrsCount); map->add(WTF::xmlnsAtom, XMLNSNames::xmlnsAttr); map->add("xmlns:xlink", QualifiedName(xmlnsAtom, xlinkAtom, XMLNSNames::xmlnsNamespaceURI)); @@ -859,10 +811,6 @@ void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken* token) m_framesetOk = false; return; } - if (token->name() == isindexTag) { - processIsindexStartTagForInBody(token); - return; - } if (token->name() == textareaTag) { m_tree.insertHTMLElement(token); m_shouldSkipLeadingNewline = true; @@ -969,7 +917,7 @@ bool HTMLTreeBuilder::processTemplateEndTag(AtomicHTMLToken* token) { ASSERT(token->name() == templateTag.localName()); if (!m_tree.openElements()->hasTemplateInHTMLScope()) { - ASSERT(m_templateInsertionModes.isEmpty() || (m_templateInsertionModes.size() == 1 && m_fragmentContext.contextElement()->hasTagName(templateTag))); + ASSERT(m_templateInsertionModes.isEmpty() || (m_templateInsertionModes.size() == 1 && isHTMLTemplateElement(m_fragmentContext.contextElement()))); parseError(token); return false; } @@ -995,7 +943,7 @@ bool HTMLTreeBuilder::processEndOfFileForInTemplateContents(AtomicHTMLToken* tok bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup() { - if (m_tree.currentIsRootNode() || m_tree.currentNode()->hasTagName(templateTag)) { + if (m_tree.currentIsRootNode() || isHTMLTemplateElement(*m_tree.currentNode())) { ASSERT(isParsingFragmentOrTemplateContents()); // FIXME: parse error return false; @@ -1396,7 +1344,7 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken* token) AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName()); processEndTag(&endOption); } - if (isHTMLOptGroupElement(m_tree.currentStackItem()->node())) { + if (m_tree.currentStackItem()->hasTagName(optgroupTag)) { AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName()); processEndTag(&endOptgroup); } @@ -1505,7 +1453,7 @@ void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken* token) ASSERT(token->type() == HTMLToken::EndTag); HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord(); while (1) { - RefPtr<HTMLStackItem> item = record->stackItem(); + RefPtrWillBeRawPtr<HTMLStackItem> item = record->stackItem(); if (item->matchesHTMLTag(token->name())) { m_tree.generateImpliedEndTagsWithExclusion(token->name()); if (!m_tree.currentStackItem()->matchesHTMLTag(token->name())) @@ -1564,7 +1512,7 @@ void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken* token) } // 7. ASSERT(furthestBlock->isAbove(formattingElementRecord)); - RefPtr<HTMLStackItem> commonAncestor = formattingElementRecord->next()->stackItem(); + RefPtrWillBeRawPtr<HTMLStackItem> commonAncestor = formattingElementRecord->next()->stackItem(); // 8. HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement); // 9. @@ -1587,7 +1535,7 @@ void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken* token) if (node == formattingElementRecord) break; // 9.7 - RefPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(node->stackItem().get()); + RefPtrWillBeRawPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(node->stackItem().get()); HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element()); nodeEntry->replaceElement(newItem); @@ -1604,7 +1552,7 @@ void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken* token) // 10. m_tree.insertAlreadyParsedChild(commonAncestor.get(), lastNode); // 11. - RefPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(formattingElementRecord->stackItem().get()); + RefPtrWillBeRawPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(formattingElementRecord->stackItem().get()); // 12. m_tree.takeAllChildren(newItem.get(), furthestBlock); // 13. @@ -1623,7 +1571,7 @@ void HTMLTreeBuilder::resetInsertionModeAppropriately() bool last = false; HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord(); while (1) { - RefPtr<HTMLStackItem> item = nodeRecord->stackItem(); + RefPtrWillBeRawPtr<HTMLStackItem> item = nodeRecord->stackItem(); if (item->node() == m_tree.openElements()->rootNode()) { last = true; if (isParsingFragment()) @@ -1636,7 +1584,7 @@ void HTMLTreeBuilder::resetInsertionModeAppropriately() while (item->node() != m_tree.openElements()->rootNode() && !item->hasTagName(templateTag)) { nodeRecord = nodeRecord->next(); item = nodeRecord->stackItem(); - if (isHTMLTableElement(item->node())) + if (item->hasTagName(tableTag)) return setInsertionMode(InSelectInTableMode); } } @@ -1653,7 +1601,7 @@ void HTMLTreeBuilder::resetInsertionModeAppropriately() if (item->hasTagName(colgroupTag)) { return setInsertionMode(InColumnGroupMode); } - if (isHTMLTableElement(item->node())) + if (item->hasTagName(tableTag)) return setInsertionMode(InTableMode); if (item->hasTagName(headTag)) { if (!m_fragmentContext.fragment() || m_fragmentContext.contextElement() != item->node()) @@ -1665,7 +1613,7 @@ void HTMLTreeBuilder::resetInsertionModeAppropriately() if (item->hasTagName(framesetTag)) { return setInsertionMode(InFramesetMode); } - if (isHTMLHtmlElement(item->node())) { + if (item->hasTagName(htmlTag)) { if (m_tree.headStackItem()) return setInsertionMode(AfterHeadMode); @@ -1839,7 +1787,7 @@ void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken* token) return; } if (token->name() == formTag) { - RefPtr<Element> node = m_tree.takeForm(); + RefPtrWillBeRawPtr<Element> node = m_tree.takeForm(); if (!node || !m_tree.openElements()->inScope(node.get())) { parseError(token); return; @@ -2212,9 +2160,9 @@ void HTMLTreeBuilder::processEndTag(AtomicHTMLToken* token) case InSelectMode: ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode); if (token->name() == optgroupTag) { - if (m_tree.currentStackItem()->hasTagName(optionTag) && m_tree.oneBelowTop() && isHTMLOptGroupElement(m_tree.oneBelowTop()->node())) + if (m_tree.currentStackItem()->hasTagName(optionTag) && m_tree.oneBelowTop() && m_tree.oneBelowTop()->hasTagName(optgroupTag)) processFakeEndTag(optionTag); - if (isHTMLOptGroupElement(m_tree.currentStackItem()->node())) { + if (m_tree.currentStackItem()->hasTagName(optgroupTag)) { m_tree.openElements()->pop(); return; } @@ -2366,11 +2314,11 @@ ReprocessBuffer: ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode); ASSERT(m_pendingTableCharacters.isEmpty()); if (m_tree.currentStackItem()->isElementNode() - && (isHTMLTableElement(m_tree.currentStackItem()->node()) - || m_tree.currentStackItem()->hasTagName(HTMLNames::tbodyTag) - || m_tree.currentStackItem()->hasTagName(HTMLNames::tfootTag) - || m_tree.currentStackItem()->hasTagName(HTMLNames::theadTag) - || m_tree.currentStackItem()->hasTagName(HTMLNames::trTag))) { + && (m_tree.currentStackItem()->hasTagName(tableTag) + || m_tree.currentStackItem()->hasTagName(tbodyTag) + || m_tree.currentStackItem()->hasTagName(tfootTag) + || m_tree.currentStackItem()->hasTagName(theadTag) + || m_tree.currentStackItem()->hasTagName(trTag))) { m_originalInsertionMode = m_insertionMode; setInsertionMode(InTableTextMode); // Note that we fall through to the InTableTextMode case below. @@ -2407,7 +2355,6 @@ ReprocessBuffer: // FIXME: parse error setInsertionMode(InBodyMode); goto ReprocessBuffer; - break; } case TextMode: { ASSERT(insertionMode() == TextMode); @@ -2423,7 +2370,6 @@ ReprocessBuffer: return; defaultForInHeadNoscript(); goto ReprocessBuffer; - break; } case InFramesetMode: case AfterFramesetMode: { @@ -2514,7 +2460,7 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken* token) ASSERT(isParsingFragment()); return; // FIXME: Should we break here instead of returning? } - ASSERT(m_tree.currentNode()->hasTagName(colgroupTag) || m_tree.currentNode()->hasTagName(templateTag)); + ASSERT(m_tree.currentNode()->hasTagName(colgroupTag) || isHTMLTemplateElement(m_tree.currentNode())); processColgroupEndTagForInColumnGroup(); // Fall through case InFramesetMode: diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilder.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilder.h index 60ead95f286..9c9dcdfae8d 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilder.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilder.h @@ -30,6 +30,7 @@ #include "core/html/parser/HTMLConstructionSite.h" #include "core/html/parser/HTMLElementStack.h" #include "core/html/parser/HTMLParserOptions.h" +#include "platform/heap/Handle.h" #include "wtf/Noncopyable.h" #include "wtf/PassOwnPtr.h" #include "wtf/PassRefPtr.h" @@ -44,24 +45,25 @@ class AtomicHTMLToken; class Document; class DocumentFragment; class Element; -class Frame; +class LocalFrame; class HTMLToken; class HTMLDocument; class Node; class HTMLDocumentParser; -class HTMLTreeBuilder { - WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED; +class HTMLTreeBuilder FINAL : public NoBaseWillBeGarbageCollectedFinalized<HTMLTreeBuilder> { + WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED; public: - static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool reportErrors, const HTMLParserOptions& options) + static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool reportErrors, const HTMLParserOptions& options) { - return adoptPtr(new HTMLTreeBuilder(parser, document, parserContentPolicy, reportErrors, options)); + return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, document, parserContentPolicy, reportErrors, options)); } - static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options) + static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options) { - return adoptPtr(new HTMLTreeBuilder(parser, fragment, contextElement, parserContentPolicy, options)); + return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, fragment, contextElement, parserContentPolicy, options)); } ~HTMLTreeBuilder(); + void trace(Visitor*); const HTMLElementStack* openElements() const { return m_tree.openElements(); } @@ -75,7 +77,7 @@ public: bool hasParserBlockingScript() const { return !!m_scriptToProcess; } // Must be called to take the parser-blocking script before calling the parser again. - PassRefPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition); + PassRefPtrWillBeRawPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition); // Done, close any open tags, etc. void finished(); @@ -153,7 +155,6 @@ private: void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>()); void processFakeEndTag(const QualifiedName&); void processFakeEndTag(const AtomicString&); - void processFakeCharacters(const String&); void processFakePEndTagIfPInButtonScope(); void processGenericRCDATAStartTag(AtomicHTMLToken*); @@ -195,6 +196,7 @@ private: class FragmentParsingContext { WTF_MAKE_NONCOPYABLE(FragmentParsingContext); + DISALLOW_ALLOCATION(); public: FragmentParsingContext(); FragmentParsingContext(DocumentFragment*, Element* contextElement); @@ -204,9 +206,11 @@ private: Element* contextElement() const { ASSERT(m_fragment); return m_contextElementStackItem->element(); } HTMLStackItem* contextElementStackItem() const { ASSERT(m_fragment); return m_contextElementStackItem.get(); } + void trace(Visitor*); + private: - DocumentFragment* m_fragment; - RefPtr<HTMLStackItem> m_contextElementStackItem; + RawPtrWillBeMember<DocumentFragment> m_fragment; + RefPtrWillBeMember<HTMLStackItem> m_contextElementStackItem; }; bool m_framesetOk; @@ -231,9 +235,9 @@ private: // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer // from within parser actions. We also need it to track the current position. - HTMLDocumentParser* m_parser; + RawPtrWillBeMember<HTMLDocumentParser> m_parser; - RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser. + RefPtrWillBeMember<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser. TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing. HTMLParserOptions m_options; diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp index 37273cbaea0..2f369953c05 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp @@ -26,9 +26,9 @@ #include "config.h" #include "core/html/parser/HTMLTreeBuilderSimulator.h" -#include "HTMLNames.h" -#include "MathMLNames.h" -#include "SVGNames.h" +#include "core/HTMLNames.h" +#include "core/MathMLNames.h" +#include "core/SVGNames.h" #include "core/html/parser/HTMLParserIdioms.h" #include "core/html/parser/HTMLTokenizer.h" #include "core/html/parser/HTMLTreeBuilder.h" diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLViewSourceParser.cpp b/chromium/third_party/WebKit/Source/core/html/parser/HTMLViewSourceParser.cpp index b46ff5ad2e9..d504a4c5703 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLViewSourceParser.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLViewSourceParser.cpp @@ -27,14 +27,16 @@ #include "core/html/parser/HTMLViewSourceParser.h" #include "core/dom/DOMImplementation.h" +#include "core/html/parser/HTMLParserIdioms.h" #include "core/html/parser/HTMLParserOptions.h" #include "core/html/parser/HTMLToken.h" +#include "core/html/parser/XSSAuditorDelegate.h" namespace WebCore { -HTMLViewSourceParser::HTMLViewSourceParser(HTMLViewSourceDocument* document, const String& mimeType) +HTMLViewSourceParser::HTMLViewSourceParser(HTMLViewSourceDocument& document, const String& mimeType) : DecodedDataDocumentParser(document) - , m_tokenizer(HTMLTokenizer::create(HTMLParserOptions(document))) + , m_tokenizer(HTMLTokenizer::create(HTMLParserOptions(&document))) { if (mimeType != "text/html" && !DOMImplementation::isXMLMIMEType(mimeType)) m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); @@ -42,17 +44,21 @@ HTMLViewSourceParser::HTMLViewSourceParser(HTMLViewSourceDocument* document, con void HTMLViewSourceParser::pumpTokenizer() { + m_xssAuditor.init(document(), 0); + while (true) { m_sourceTracker.start(m_input.current(), m_tokenizer.get(), m_token); if (!m_tokenizer->nextToken(m_input.current(), m_token)) return; m_sourceTracker.end(m_input.current(), m_tokenizer.get(), m_token); - document()->addSource(m_sourceTracker.sourceForToken(m_token), m_token); + OwnPtr<XSSInfo> xssInfo = m_xssAuditor.filterToken(FilterTokenRequest(m_token, m_sourceTracker, m_tokenizer->shouldAllowCDATA())); + HTMLViewSourceDocument::SourceAnnotation annotation = xssInfo ? HTMLViewSourceDocument::AnnotateSourceAsXSS : HTMLViewSourceDocument::AnnotateSourceAsSafe; + document()->addSource(m_sourceTracker.sourceForToken(m_token), m_token, annotation); // FIXME: The tokenizer should do this work for us. if (m_token.type() == HTMLToken::StartTag) - m_tokenizer->updateStateFor(AtomicString(m_token.name())); + m_tokenizer->updateStateFor(attemptStaticStringCreation(m_token.name(), Likely8Bit)); m_token.clear(); } } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/HTMLViewSourceParser.h b/chromium/third_party/WebKit/Source/core/html/parser/HTMLViewSourceParser.h index d1f0bec23c8..853387be4ab 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/HTMLViewSourceParser.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/HTMLViewSourceParser.h @@ -31,19 +31,20 @@ #include "core/html/parser/HTMLInputStream.h" #include "core/html/parser/HTMLSourceTracker.h" #include "core/html/parser/HTMLTokenizer.h" +#include "core/html/parser/XSSAuditor.h" namespace WebCore { class HTMLViewSourceParser FINAL : public DecodedDataDocumentParser { public: - static PassRefPtr<HTMLViewSourceParser> create(HTMLViewSourceDocument* document, const String& mimeType) + static PassRefPtrWillBeRawPtr<HTMLViewSourceParser> create(HTMLViewSourceDocument& document, const String& mimeType) { - return adoptRef(new HTMLViewSourceParser(document, mimeType)); + return adoptRefWillBeNoop(new HTMLViewSourceParser(document, mimeType)); } virtual ~HTMLViewSourceParser() { } private: - HTMLViewSourceParser(HTMLViewSourceDocument*, const String& mimeType); + HTMLViewSourceParser(HTMLViewSourceDocument&, const String& mimeType); // DocumentParser virtual void insert(const SegmentedString&) OVERRIDE { ASSERT_NOT_REACHED(); } @@ -59,6 +60,7 @@ private: HTMLToken m_token; HTMLSourceTracker m_sourceTracker; OwnPtr<HTMLTokenizer> m_tokenizer; + XSSAuditor m_xssAuditor; }; } diff --git a/chromium/third_party/WebKit/Source/core/html/parser/NestingLevelIncrementer.h b/chromium/third_party/WebKit/Source/core/html/parser/NestingLevelIncrementer.h index fa1d110bdf3..478210e394e 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/NestingLevelIncrementer.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/NestingLevelIncrementer.h @@ -26,11 +26,13 @@ #ifndef NestingLevelIncrementer_h #define NestingLevelIncrementer_h +#include "platform/heap/Handle.h" #include "wtf/Noncopyable.h" namespace WebCore { class NestingLevelIncrementer { + STACK_ALLOCATED(); WTF_MAKE_NONCOPYABLE(NestingLevelIncrementer); public: explicit NestingLevelIncrementer(unsigned& nestingLevel) diff --git a/chromium/third_party/WebKit/Source/core/html/parser/TextDocumentParser.cpp b/chromium/third_party/WebKit/Source/core/html/parser/TextDocumentParser.cpp index 38175a0f6bf..b7fc5151e67 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/TextDocumentParser.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/TextDocumentParser.cpp @@ -25,14 +25,14 @@ #include "config.h" #include "core/html/parser/TextDocumentParser.h" -#include "HTMLNames.h" +#include "core/HTMLNames.h" #include "core/html/parser/HTMLTreeBuilder.h" namespace WebCore { using namespace HTMLNames; -TextDocumentParser::TextDocumentParser(HTMLDocument* document) +TextDocumentParser::TextDocumentParser(HTMLDocument& document) : HTMLDocumentParser(document, false) , m_haveInsertedFakePreElement(false) { @@ -42,11 +42,11 @@ TextDocumentParser::~TextDocumentParser() { } -void TextDocumentParser::append(PassRefPtr<StringImpl> text) +void TextDocumentParser::appendBytes(const char* data, size_t length) { if (!m_haveInsertedFakePreElement) insertFakePreElement(); - HTMLDocumentParser::append(text); + HTMLDocumentParser::appendBytes(data, length); } void TextDocumentParser::insertFakePreElement() diff --git a/chromium/third_party/WebKit/Source/core/html/parser/TextDocumentParser.h b/chromium/third_party/WebKit/Source/core/html/parser/TextDocumentParser.h index 6b5bb9e6a4c..d1de29543dc 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/TextDocumentParser.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/TextDocumentParser.h @@ -30,18 +30,18 @@ namespace WebCore { -class TextDocumentParser : public HTMLDocumentParser { +class TextDocumentParser FINAL : public HTMLDocumentParser { public: - static PassRefPtr<TextDocumentParser> create(HTMLDocument* document) + static PassRefPtrWillBeRawPtr<TextDocumentParser> create(HTMLDocument& document) { - return adoptRef(new TextDocumentParser(document)); + return adoptRefWillBeNoop(new TextDocumentParser(document)); } virtual ~TextDocumentParser(); private: - explicit TextDocumentParser(HTMLDocument*); + explicit TextDocumentParser(HTMLDocument&); - virtual void append(PassRefPtr<StringImpl>); + virtual void appendBytes(const char*, size_t) OVERRIDE; void insertFakePreElement(); bool m_haveInsertedFakePreElement; diff --git a/chromium/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp b/chromium/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp new file mode 100644 index 00000000000..1f21e244f2a --- /dev/null +++ b/chromium/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp @@ -0,0 +1,439 @@ +/* + Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. + Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + + +#include "config.h" +#include "core/html/parser/TextResourceDecoder.h" + +#include "core/HTMLNames.h" +#include "core/dom/DOMImplementation.h" +#include "core/html/parser/HTMLMetaCharsetParser.h" +#include "platform/text/TextEncodingDetector.h" +#include "wtf/StringExtras.h" +#include "wtf/text/TextCodec.h" +#include "wtf/text/TextEncodingRegistry.h" + +using namespace WTF; + +namespace WebCore { + +using namespace HTMLNames; + +static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4) +{ + return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4; +} + +static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4, char b5) +{ + return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && p[5] == b5; +} + +static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7) +{ + return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && p[5] == b5 && p[6] == b6 && p[7] == b7; +} + +static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9) +{ + return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && p[5] == b5 && p[6] == b6 && p[7] == b7 && p[8] == b8 && p[9] == b9; +} + +// You might think we should put these find functions elsewhere, perhaps with the +// similar functions that operate on UChar, but arguably only the decoder has +// a reason to process strings of char rather than UChar. + +static int find(const char* subject, size_t subjectLength, const char* target) +{ + size_t targetLength = strlen(target); + if (targetLength > subjectLength) + return -1; + for (size_t i = 0; i <= subjectLength - targetLength; ++i) { + bool match = true; + for (size_t j = 0; j < targetLength; ++j) { + if (subject[i + j] != target[j]) { + match = false; + break; + } + } + if (match) + return i; + } + return -1; +} + +static WTF::TextEncoding findTextEncoding(const char* encodingName, int length) +{ + Vector<char, 64> buffer(length + 1); + memcpy(buffer.data(), encodingName, length); + buffer[length] = '\0'; + return buffer.data(); +} + +TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const String& mimeType) +{ + if (equalIgnoringCase(mimeType, "text/css")) + return CSSContent; + if (equalIgnoringCase(mimeType, "text/html")) + return HTMLContent; + if (DOMImplementation::isXMLMIMEType(mimeType)) + return XMLContent; + return PlainTextContent; +} + +const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType contentType, const WTF::TextEncoding& specifiedDefaultEncoding) +{ + // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII + // for text/xml. This matches Firefox. + if (contentType == XMLContent) + return UTF8Encoding(); + if (!specifiedDefaultEncoding.isValid()) + return Latin1Encoding(); + return specifiedDefaultEncoding; +} + +TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& specifiedDefaultEncoding, bool usesEncodingDetector) + : m_contentType(determineContentType(mimeType)) + , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) + , m_source(DefaultEncoding) + , m_hintEncoding(0) + , m_checkedForBOM(false) + , m_checkedForCSSCharset(false) + , m_checkedForXMLCharset(false) + , m_checkedForMetaCharset(false) + , m_useLenientXMLDecoding(false) + , m_sawError(false) + , m_usesEncodingDetector(usesEncodingDetector) +{ +} + +TextResourceDecoder::~TextResourceDecoder() +{ +} + +void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, EncodingSource source) +{ + // In case the encoding didn't exist, we keep the old one (helps some sites specifying invalid encodings). + if (!encoding.isValid()) + return; + + // When encoding comes from meta tag (i.e. it cannot be XML files sent via XHR), + // treat x-user-defined as windows-1252 (bug 18270) + if (source == EncodingFromMetaTag && !strcasecmp(encoding.name(), "x-user-defined")) + m_encoding = "windows-1252"; + else if (source == EncodingFromMetaTag || source == EncodingFromXMLHeader || source == EncodingFromCSSCharset) + m_encoding = encoding.closestByteBasedEquivalent(); + else + m_encoding = encoding; + + m_codec.clear(); + m_source = source; +} + +// Returns the position of the encoding string. +static int findXMLEncoding(const char* str, int len, int& encodingLength) +{ + int pos = find(str, len, "encoding"); + if (pos == -1) + return -1; + pos += 8; + + // Skip spaces and stray control characters. + while (pos < len && str[pos] <= ' ') + ++pos; + + // Skip equals sign. + if (pos >= len || str[pos] != '=') + return -1; + ++pos; + + // Skip spaces and stray control characters. + while (pos < len && str[pos] <= ' ') + ++pos; + + // Skip quotation mark. + if (pos >= len) + return - 1; + char quoteMark = str[pos]; + if (quoteMark != '"' && quoteMark != '\'') + return -1; + ++pos; + + // Find the trailing quotation mark. + int end = pos; + while (end < len && str[end] != quoteMark) + ++end; + if (end >= len) + return -1; + + encodingLength = end - pos; + return pos; +} + +size_t TextResourceDecoder::checkForBOM(const char* data, size_t len) +{ + // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure sign of a Unicode encoding. + // We let it override even a user-chosen encoding. + ASSERT(!m_checkedForBOM); + + size_t lengthOfBOM = 0; + + size_t bufferLength = m_buffer.size(); + + size_t buf1Len = bufferLength; + size_t buf2Len = len; + const unsigned char* buf1 = reinterpret_cast<const unsigned char*>(m_buffer.data()); + const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data); + unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; + unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; + unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; + unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; + + // Check for the BOM. + if (c1 == 0xFF && c2 == 0xFE) { + if (c3 || c4) { + setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); + lengthOfBOM = 2; + } else { + setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); + lengthOfBOM = 4; + } + } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { + setEncoding(UTF8Encoding(), AutoDetectedEncoding); + lengthOfBOM = 3; + } else if (c1 == 0xFE && c2 == 0xFF) { + setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); + lengthOfBOM = 2; + } else if (!c1 && !c2 && c3 == 0xFE && c4 == 0xFF) { + setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); + lengthOfBOM = 4; + } + + if (lengthOfBOM || bufferLength + len >= 4) + m_checkedForBOM = true; + + return lengthOfBOM; +} + +bool TextResourceDecoder::checkForCSSCharset(const char* data, size_t len, bool& movedDataToBuffer) +{ + if (m_source != DefaultEncoding && m_source != EncodingFromParentFrame) { + m_checkedForCSSCharset = true; + return true; + } + + size_t oldSize = m_buffer.size(); + m_buffer.grow(oldSize + len); + memcpy(m_buffer.data() + oldSize, data, len); + + movedDataToBuffer = true; + + if (m_buffer.size() <= 13) // strlen('@charset "x";') == 13 + return false; + + const char* dataStart = m_buffer.data(); + const char* dataEnd = dataStart + m_buffer.size(); + + if (bytesEqual(dataStart, '@', 'c', 'h', 'a', 'r', 's', 'e', 't', ' ', '"')) { + dataStart += 10; + const char* pos = dataStart; + + while (pos < dataEnd && *pos != '"') + ++pos; + if (pos == dataEnd) + return false; + + int encodingNameLength = pos - dataStart; + + ++pos; + if (pos == dataEnd) + return false; + + if (*pos == ';') + setEncoding(findTextEncoding(dataStart, encodingNameLength), EncodingFromCSSCharset); + } + + m_checkedForCSSCharset = true; + return true; +} + +bool TextResourceDecoder::checkForXMLCharset(const char* data, size_t len, bool& movedDataToBuffer) +{ + if (m_source != DefaultEncoding && m_source != EncodingFromParentFrame) { + m_checkedForXMLCharset = true; + return true; + } + + // This is not completely efficient, since the function might go + // through the HTML head several times. + + size_t oldSize = m_buffer.size(); + m_buffer.grow(oldSize + len); + memcpy(m_buffer.data() + oldSize, data, len); + + movedDataToBuffer = true; + + const char* ptr = m_buffer.data(); + const char* pEnd = ptr + m_buffer.size(); + + // Is there enough data available to check for XML declaration? + if (m_buffer.size() < 8) + return false; + + // Handle XML declaration, which can have encoding in it. This encoding is honored even for HTML documents. + // It is an error for an XML declaration not to be at the start of an XML document, and it is ignored in HTML documents in such case. + if (bytesEqual(ptr, '<', '?', 'x', 'm', 'l')) { + const char* xmlDeclarationEnd = ptr; + while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>') + ++xmlDeclarationEnd; + if (xmlDeclarationEnd == pEnd) + return false; + // No need for +1, because we have an extra "?" to lose at the end of XML declaration. + int len = 0; + int pos = findXMLEncoding(ptr, xmlDeclarationEnd - ptr, len); + if (pos != -1) + setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader); + // continue looking for a charset - it may be specified in an HTTP-Equiv meta + } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0)) { + setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); + } else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x')) { + setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); + } else if (bytesEqual(ptr, '<', 0, 0, 0, '?', 0, 0, 0)) { + setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); + } else if (bytesEqual(ptr, 0, 0, 0, '<', 0, 0, 0, '?')) { + setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); + } + + m_checkedForXMLCharset = true; + return true; +} + +void TextResourceDecoder::checkForMetaCharset(const char* data, size_t length) +{ + if (m_source == UserChosenEncoding || m_source == EncodingFromHTTPHeader || m_source == AutoDetectedEncoding) { + m_checkedForMetaCharset = true; + return; + } + + if (!m_charsetParser) + m_charsetParser = HTMLMetaCharsetParser::create(); + + if (!m_charsetParser->checkForMetaCharset(data, length)) + return; + + setEncoding(m_charsetParser->encoding(), EncodingFromMetaTag); + m_charsetParser.clear(); + m_checkedForMetaCharset = true; + return; +} + +// We use the encoding detector in two cases: +// 1. Encoding detector is turned ON and no other encoding source is +// available (that is, it's DefaultEncoding). +// 2. Encoding detector is turned ON and the encoding is set to +// the encoding of the parent frame, which is also auto-detected. +// Note that condition #2 is NOT satisfied unless parent-child frame +// relationship is compliant to the same-origin policy. If they're from +// different domains, |m_source| would not be set to EncodingFromParentFrame +// in the first place. +bool TextResourceDecoder::shouldAutoDetect() const +{ + // Just checking m_hintEncoding suffices here because it's only set + // in setHintEncoding when the source is AutoDetectedEncoding. + return m_usesEncodingDetector + && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding)); +} + +String TextResourceDecoder::decode(const char* data, size_t len) +{ + size_t lengthOfBOM = 0; + if (!m_checkedForBOM) + lengthOfBOM = checkForBOM(data, len); + + bool movedDataToBuffer = false; + + if (m_contentType == CSSContent && !m_checkedForCSSCharset) { + if (!checkForCSSCharset(data, len, movedDataToBuffer)) + return emptyString(); + } + + if ((m_contentType == HTMLContent || m_contentType == XMLContent) && !m_checkedForXMLCharset) { + if (!checkForXMLCharset(data, len, movedDataToBuffer)) + return emptyString(); + } + + const char* dataForDecode = data + lengthOfBOM; + size_t lengthForDecode = len - lengthOfBOM; + + if (!m_buffer.isEmpty()) { + if (!movedDataToBuffer) { + size_t oldSize = m_buffer.size(); + m_buffer.grow(oldSize + len); + memcpy(m_buffer.data() + oldSize, data, len); + } + + dataForDecode = m_buffer.data() + lengthOfBOM; + lengthForDecode = m_buffer.size() - lengthOfBOM; + } + + if (m_contentType == HTMLContent && !m_checkedForMetaCharset) + checkForMetaCharset(dataForDecode, lengthForDecode); + + if (shouldAutoDetect()) { + WTF::TextEncoding detectedEncoding; + if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) + setEncoding(detectedEncoding, EncodingFromContentSniffing); + } + + ASSERT(m_encoding.isValid()); + + if (!m_codec) + m_codec = newTextCodec(m_encoding); + + String result = m_codec->decode(dataForDecode, lengthForDecode, DoNotFlush, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); + + m_buffer.clear(); + return result; +} + +String TextResourceDecoder::flush() +{ + // If we can not identify the encoding even after a document is completely + // loaded, we need to detect the encoding if other conditions for + // autodetection is satisfied. + if (m_buffer.size() && shouldAutoDetect() + && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_contentType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSContent)))) { + WTF::TextEncoding detectedEncoding; + if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, &detectedEncoding)) + setEncoding(detectedEncoding, EncodingFromContentSniffing); + } + + if (!m_codec) + m_codec = newTextCodec(m_encoding); + + String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); + m_buffer.clear(); + m_codec.clear(); + m_checkedForBOM = false; // Skip BOM again when re-decoding. + return result; +} + +} diff --git a/chromium/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h b/chromium/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h new file mode 100644 index 00000000000..5ad2ad561f8 --- /dev/null +++ b/chromium/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h @@ -0,0 +1,106 @@ +/* + Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) + Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) + Copyright (C) 2006, 2008 Apple Inc. All rights reserved. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. + +*/ + +#ifndef TextResourceDecoder_h +#define TextResourceDecoder_h + +#include "wtf/RefCounted.h" +#include "wtf/text/TextEncoding.h" + +namespace WebCore { + +class DocumentEncodingData; +class HTMLMetaCharsetParser; + +class TextResourceDecoder { +public: + enum EncodingSource { + DefaultEncoding, + AutoDetectedEncoding, + EncodingFromContentSniffing, + EncodingFromXMLHeader, + EncodingFromMetaTag, + EncodingFromCSSCharset, + EncodingFromHTTPHeader, + UserChosenEncoding, + EncodingFromParentFrame + }; + + static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetector = false) + { + return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector)); + } + ~TextResourceDecoder(); + + void setEncoding(const WTF::TextEncoding&, EncodingSource); + const WTF::TextEncoding& encoding() const { return m_encoding; } + bool encodingWasDetectedHeuristically() const + { + return m_source == AutoDetectedEncoding + || m_source == EncodingFromContentSniffing; + } + + String decode(const char* data, size_t length); + String flush(); + + void setHintEncoding(const WTF::TextEncoding& encoding) + { + m_hintEncoding = encoding.name(); + } + + void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } + bool sawError() const { return m_sawError; } + +private: + TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& defaultEncoding, bool usesEncodingDetector); + + enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM. + static ContentType determineContentType(const String& mimeType); + static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::TextEncoding& defaultEncoding); + + size_t checkForBOM(const char*, size_t); + bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); + bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); + void checkForMetaCharset(const char*, size_t); + void detectJapaneseEncoding(const char*, size_t); + bool shouldAutoDetect() const; + + ContentType m_contentType; + WTF::TextEncoding m_encoding; + OwnPtr<TextCodec> m_codec; + EncodingSource m_source; + const char* m_hintEncoding; + Vector<char> m_buffer; + bool m_checkedForBOM; + bool m_checkedForCSSCharset; + bool m_checkedForXMLCharset; + bool m_checkedForMetaCharset; + bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. + bool m_sawError; + bool m_usesEncodingDetector; + + OwnPtr<HTMLMetaCharsetParser> m_charsetParser; +}; + +} + +#endif diff --git a/chromium/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp b/chromium/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp index a43fca982b5..5189447af88 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp @@ -27,22 +27,23 @@ #include "config.h" #include "core/html/parser/XSSAuditor.h" -#include "HTMLNames.h" -#include "SVGNames.h" -#include "XLinkNames.h" +#include "core/HTMLNames.h" +#include "core/SVGNames.h" +#include "core/XLinkNames.h" #include "core/dom/Document.h" -#include "core/fetch/TextResourceDecoder.h" -#include "core/frame/ContentSecurityPolicy.h" -#include "core/frame/Frame.h" +#include "core/frame/LocalFrame.h" +#include "core/frame/csp/ContentSecurityPolicy.h" #include "core/html/HTMLParamElement.h" #include "core/html/parser/HTMLDocumentParser.h" #include "core/html/parser/HTMLParserIdioms.h" +#include "core/html/parser/TextResourceDecoder.h" #include "core/html/parser/XSSAuditorDelegate.h" #include "core/loader/DocumentLoader.h" #include "core/frame/Settings.h" #include "platform/JSONValues.h" #include "platform/network/FormData.h" #include "platform/text/DecodeEscapeSequences.h" +#include "wtf/ASCIICType.h" #include "wtf/MainThread.h" namespace { @@ -63,14 +64,11 @@ static bool isNonCanonicalCharacter(UChar c) // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character. // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the // adverse effect that we remove any legitimate zeros from a string. + // We also remove forward-slash, because it is common for some servers to collapse successive path components, eg, + // a//b becomes a/b. // - // For instance: new String("http://localhost:8000") => new String("http://localhost:8"). - return (c == '\\' || c == '0' || c == '\0' || c >= 127); -} - -static String canonicalize(const String& string) -{ - return string.removeCharacters(&isNonCanonicalCharacter); + // For instance: new String("http://localhost:8000") => new String("http:localhost:8"). + return (c == '\\' || c == '0' || c == '\0' || c == '/' || c >= 127); } static bool isRequiredForInjection(UChar c) @@ -96,17 +94,28 @@ static bool isJSNewline(UChar c) static bool startsHTMLCommentAt(const String& string, size_t start) { - return (start + 3 < string.length() && string[start] == '<' && string[start+1] == '!' && string[start+2] == '-' && string[start+3] == '-'); + return (start + 3 < string.length() && string[start] == '<' && string[start + 1] == '!' && string[start + 2] == '-' && string[start + 3] == '-'); } static bool startsSingleLineCommentAt(const String& string, size_t start) { - return (start + 1 < string.length() && string[start] == '/' && string[start+1] == '/'); + return (start + 1 < string.length() && string[start] == '/' && string[start + 1] == '/'); } static bool startsMultiLineCommentAt(const String& string, size_t start) { - return (start + 1 < string.length() && string[start] == '/' && string[start+1] == '*'); + return (start + 1 < string.length() && string[start] == '/' && string[start + 1] == '*'); +} + +static bool startsOpeningScriptTagAt(const String& string, size_t start) +{ + return start + 6 < string.length() && string[start] == '<' + && WTF::toASCIILowerUnchecked(string[start + 1]) == 's' + && WTF::toASCIILowerUnchecked(string[start + 2]) == 'c' + && WTF::toASCIILowerUnchecked(string[start + 3]) == 'r' + && WTF::toASCIILowerUnchecked(string[start + 4]) == 'i' + && WTF::toASCIILowerUnchecked(string[start + 5]) == 'p' + && WTF::toASCIILowerUnchecked(string[start + 6]) == 't'; } // If other files need this, we should move this to core/html/parser/HTMLParserIdioms.h @@ -171,10 +180,60 @@ static String fullyDecodeString(const String& string, const WTF::TextEncoding& e workingString = decode16BitUnicodeEscapeSequences(decodeStandardURLEscapeSequences(workingString, encoding)); } while (workingString.length() < oldWorkingStringLength); workingString.replace('+', ' '); - workingString = canonicalize(workingString); return workingString; } +static void truncateForSrcLikeAttribute(String& decodedSnippet) +{ + // In HTTP URLs, characters following the first ?, #, or third slash may come from + // the page itself and can be merely ignored by an attacker's server when a remote + // script or script-like resource is requested. In DATA URLS, the payload starts at + // the first comma, and the the first /*, //, or <!-- may introduce a comment. Characters + // following this may come from the page itself and may be ignored when the script is + // executed. For simplicity, we don't differentiate based on URL scheme, and stop at + // the first # or ?, the third slash, or the first slash or < once a comma is seen. + int slashCount = 0; + bool commaSeen = false; + for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) { + UChar currentChar = decodedSnippet[currentLength]; + if (currentChar == '?' + || currentChar == '#' + || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2)) + || (currentChar == '<' && commaSeen)) { + decodedSnippet.truncate(currentLength); + return; + } + if (currentChar == ',') + commaSeen = true; + } +} + +static void truncateForScriptLikeAttribute(String& decodedSnippet) +{ + // Beware of trailing characters which came from the page itself, not the + // injected vector. Excluding the terminating character covers common cases + // where the page immediately ends the attribute, but doesn't cover more + // complex cases where there is other page data following the injection. + // Generally, these won't parse as javascript, so the injected vector + // typically excludes them from consideration via a single-line comment or + // by enclosing them in a string literal terminated later by the page's own + // closing punctuation. Since the snippet has not been parsed, the vector + // may also try to introduce these via entities. As a result, we'd like to + // stop before the first "//", the first <!--, the first entity, or the first + // quote not immediately following the first equals sign (taking whitespace + // into consideration). To keep things simpler, we don't try to distinguish + // between entity-introducing amperands vs. other uses, nor do we bother to + // check for a second slash for a comment, nor do we bother to check for + // !-- following a less-than sign. We stop instead on any ampersand + // slash, or less-than sign. + size_t position = 0; + if ((position = decodedSnippet.find("=")) != kNotFound + && (position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) != kNotFound + && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != kNotFound) { + decodedSnippet.truncate(position); + } +} + static ReflectedXSSDisposition combineXSSProtectionHeaderAndCSP(ReflectedXSSDisposition xssProtection, ReflectedXSSDisposition reflectedXSS) { ReflectedXSSDisposition result = std::max(xssProtection, reflectedXSS); @@ -190,15 +249,16 @@ static bool isSemicolonSeparatedAttribute(const HTMLToken::Attribute& attribute) return threadSafeMatch(attribute.name, SVGNames::valuesAttr); } -static bool semicolonSeparatedValueContainsJavaScriptURL(const String& value) +static String semicolonSeparatedValueContainingJavaScriptURL(const String& value) { Vector<String> valueList; value.split(';', valueList); for (size_t i = 0; i < valueList.size(); ++i) { - if (protocolIsJavaScript(valueList[i])) - return true; + String stripped = stripLeadingAndTrailingHTMLSpaces(valueList[i]); + if (protocolIsJavaScript(stripped)) + return stripped; } - return false; + return emptyString(); } XSSAuditor::XSSAuditor() @@ -227,9 +287,6 @@ void XSSAuditor::initForFragment() void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate) { - const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter. - const int suffixTreeDepth = 5; - ASSERT(isMainThread()); if (m_state != Uninitialized) return; @@ -243,7 +300,7 @@ void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate) m_documentURL = document->url().copy(); - // In theory, the Document could have detached from the Frame after the + // In theory, the Document could have detached from the LocalFrame after the // XSSAuditor was constructed. if (!document->frame()) { m_isEnabled = false; @@ -264,11 +321,6 @@ void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate) if (document->encoding().isValid()) m_encoding = document->encoding(); - m_decodedURL = fullyDecodeString(m_documentURL.string(), m_encoding); - if (m_decodedURL.find(isRequiredForInjection) == kNotFound) - m_decodedURL = String(); - - String httpBodyAsString; if (DocumentLoader* documentLoader = document->frame()->loader().documentLoader()) { DEFINE_STATIC_LOCAL(const AtomicString, XSSProtectionHeader, ("X-XSS-Protection", AtomicString::ConstructFromLiteral)); const AtomicString& headerValue = documentLoader->response().httpHeaderField(XSSProtectionHeader); @@ -298,23 +350,40 @@ void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate) // FIXME: Combine the two report URLs in some reasonable way. if (auditorDelegate) auditorDelegate->setReportURL(xssProtectionReportURL.copy()); + FormData* httpBody = documentLoader->request().httpBody(); - if (httpBody && !httpBody->isEmpty()) { - httpBodyAsString = httpBody->flattenToString(); - if (!httpBodyAsString.isEmpty()) { - m_decodedHTTPBody = fullyDecodeString(httpBodyAsString, m_encoding); - if (m_decodedHTTPBody.find(isRequiredForInjection) == kNotFound) - m_decodedHTTPBody = String(); - if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree) - m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth)); - } - } + if (httpBody && !httpBody->isEmpty()) + m_httpBodyAsString = httpBody->flattenToString(); } - if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty()) { - m_isEnabled = false; + setEncoding(m_encoding); +} + +void XSSAuditor::setEncoding(const WTF::TextEncoding& encoding) +{ + const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter. + const int suffixTreeDepth = 5; + + if (!encoding.isValid()) return; + + m_encoding = encoding; + + m_decodedURL = canonicalize(m_documentURL.string(), NoTruncation); + if (m_decodedURL.find(isRequiredForInjection) == kNotFound) + m_decodedURL = String(); + + if (!m_httpBodyAsString.isEmpty()) { + m_decodedHTTPBody = canonicalize(m_httpBodyAsString, NoTruncation); + m_httpBodyAsString = String(); + if (m_decodedHTTPBody.find(isRequiredForInjection) == kNotFound) + m_decodedHTTPBody = String(); + if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree) + m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth)); } + + if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty()) + m_isEnabled = false; } PassOwnPtr<XSSInfo> XSSAuditor::filterToken(const FilterTokenRequest& request) @@ -392,7 +461,7 @@ bool XSSAuditor::filterCharacterToken(const FilterTokenRequest& request) return false; if ((m_state == SuppressingAdjacentCharacterTokens) - || (m_scriptTagFoundInRequest && isContainedInRequest(decodedSnippetForJavaScript(request)))) { + || (m_scriptTagFoundInRequest && isContainedInRequest(canonicalizedSnippetForJavaScript(request)))) { request.token.eraseCharacters(); request.token.appendToCharacter(' '); // Technically, character tokens can't be empty. m_state = SuppressingAdjacentCharacterTokens; @@ -409,10 +478,10 @@ bool XSSAuditor::filterScriptToken(const FilterTokenRequest& request) ASSERT(hasName(request.token, scriptTag)); bool didBlockScript = false; - m_scriptTagFoundInRequest = isContainedInRequest(decodedSnippetForName(request)); + m_scriptTagFoundInRequest = isContainedInRequest(canonicalizedSnippetForTagName(request)); if (m_scriptTagFoundInRequest) { - didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttribute); - didBlockScript |= eraseAttributeIfInjected(request, XLinkNames::hrefAttr, blankURL().string(), SrcLikeAttribute); + didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttributeTruncation); + didBlockScript |= eraseAttributeIfInjected(request, XLinkNames::hrefAttr, blankURL().string(), SrcLikeAttributeTruncation); } return didBlockScript; } @@ -423,8 +492,8 @@ bool XSSAuditor::filterObjectToken(const FilterTokenRequest& request) ASSERT(hasName(request.token, objectTag)); bool didBlockScript = false; - if (isContainedInRequest(decodedSnippetForName(request))) { - didBlockScript |= eraseAttributeIfInjected(request, dataAttr, blankURL().string(), SrcLikeAttribute); + if (isContainedInRequest(canonicalizedSnippetForTagName(request))) { + didBlockScript |= eraseAttributeIfInjected(request, dataAttr, blankURL().string(), SrcLikeAttributeTruncation); didBlockScript |= eraseAttributeIfInjected(request, typeAttr); didBlockScript |= eraseAttributeIfInjected(request, classidAttr); } @@ -444,7 +513,7 @@ bool XSSAuditor::filterParamToken(const FilterTokenRequest& request) if (!HTMLParamElement::isURLParameter(String(nameAttribute.value))) return false; - return eraseAttributeIfInjected(request, valueAttr, blankURL().string(), SrcLikeAttribute); + return eraseAttributeIfInjected(request, valueAttr, blankURL().string(), SrcLikeAttributeTruncation); } bool XSSAuditor::filterEmbedToken(const FilterTokenRequest& request) @@ -453,9 +522,9 @@ bool XSSAuditor::filterEmbedToken(const FilterTokenRequest& request) ASSERT(hasName(request.token, embedTag)); bool didBlockScript = false; - if (isContainedInRequest(decodedSnippetForName(request))) { - didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttribute); - didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttribute); + if (isContainedInRequest(canonicalizedSnippetForTagName(request))) { + didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttributeTruncation); + didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttributeTruncation); didBlockScript |= eraseAttributeIfInjected(request, typeAttr); } return didBlockScript; @@ -467,8 +536,8 @@ bool XSSAuditor::filterAppletToken(const FilterTokenRequest& request) ASSERT(hasName(request.token, appletTag)); bool didBlockScript = false; - if (isContainedInRequest(decodedSnippetForName(request))) { - didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttribute); + if (isContainedInRequest(canonicalizedSnippetForTagName(request))) { + didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttributeTruncation); didBlockScript |= eraseAttributeIfInjected(request, objectAttr); } return didBlockScript; @@ -479,9 +548,9 @@ bool XSSAuditor::filterFrameToken(const FilterTokenRequest& request) ASSERT(request.token.type() == HTMLToken::StartTag); ASSERT(hasName(request.token, iframeTag) || hasName(request.token, frameTag)); - bool didBlockScript = eraseAttributeIfInjected(request, srcdocAttr, String(), ScriptLikeAttribute); - if (isContainedInRequest(decodedSnippetForName(request))) - didBlockScript |= eraseAttributeIfInjected(request, srcAttr, String(), SrcLikeAttribute); + bool didBlockScript = eraseAttributeIfInjected(request, srcdocAttr, String(), ScriptLikeAttributeTruncation); + if (isContainedInRequest(canonicalizedSnippetForTagName(request))) + didBlockScript |= eraseAttributeIfInjected(request, srcAttr, String(), SrcLikeAttributeTruncation); return didBlockScript; } @@ -515,7 +584,7 @@ bool XSSAuditor::filterInputToken(const FilterTokenRequest& request) ASSERT(request.token.type() == HTMLToken::StartTag); ASSERT(hasName(request.token, inputTag)); - return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttribute); + return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttributeTruncation); } bool XSSAuditor::filterButtonToken(const FilterTokenRequest& request) @@ -523,7 +592,7 @@ bool XSSAuditor::filterButtonToken(const FilterTokenRequest& request) ASSERT(request.token.type() == HTMLToken::StartTag); ASSERT(hasName(request.token, buttonTag)); - return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttribute); + return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttributeTruncation); } bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& request) @@ -532,14 +601,24 @@ bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& re bool didBlockScript = false; for (size_t i = 0; i < request.token.attributes().size(); ++i) { + bool eraseAttribute = false; + bool valueContainsJavaScriptURL = false; const HTMLToken::Attribute& attribute = request.token.attributes().at(i); - bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.name); - // FIXME: It would be better if we didn't create a new String for every attribute in the document. - String strippedValue = stripLeadingAndTrailingHTMLSpaces(String(attribute.value)); - bool valueContainsJavaScriptURL = (!isInlineEventHandler && protocolIsJavaScript(strippedValue)) || (isSemicolonSeparatedAttribute(attribute) && semicolonSeparatedValueContainsJavaScriptURL(strippedValue)); - if (!isInlineEventHandler && !valueContainsJavaScriptURL) - continue; - if (!isContainedInRequest(decodedSnippetForAttribute(request, attribute, ScriptLikeAttribute))) + // FIXME: Don't create a new String for every attribute.value in the document. + if (isNameOfInlineEventHandler(attribute.name)) { + eraseAttribute = isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), ScriptLikeAttributeTruncation)); + } else if (isSemicolonSeparatedAttribute(attribute)) { + String subValue = semicolonSeparatedValueContainingJavaScriptURL(String(attribute.value)); + if (!subValue.isEmpty()) { + valueContainsJavaScriptURL = true; + eraseAttribute = isContainedInRequest(canonicalize(nameFromAttribute(request, attribute), NoTruncation)) + && isContainedInRequest(canonicalize(subValue, ScriptLikeAttributeTruncation)); + } + } else if (protocolIsJavaScript(stripLeadingAndTrailingHTMLSpaces(String(attribute.value)))) { + valueContainsJavaScriptURL = true; + eraseAttribute = isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), ScriptLikeAttributeTruncation)); + } + if (!eraseAttribute) continue; request.token.eraseValueOfAttribute(i); if (valueContainsJavaScriptURL) @@ -549,96 +628,79 @@ bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& re return didBlockScript; } -bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request, const QualifiedName& attributeName, const String& replacementValue, AttributeKind treatment) +bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request, const QualifiedName& attributeName, const String& replacementValue, TruncationKind treatment) { size_t indexOfAttribute = 0; - if (findAttributeWithName(request.token, attributeName, indexOfAttribute)) { - const HTMLToken::Attribute& attribute = request.token.attributes().at(indexOfAttribute); - if (isContainedInRequest(decodedSnippetForAttribute(request, attribute, treatment))) { - if (threadSafeMatch(attributeName, srcAttr) && isLikelySafeResource(String(attribute.value))) - return false; - if (threadSafeMatch(attributeName, http_equivAttr) && !isDangerousHTTPEquiv(String(attribute.value))) - return false; - request.token.eraseValueOfAttribute(indexOfAttribute); - if (!replacementValue.isEmpty()) - request.token.appendToAttributeValue(indexOfAttribute, replacementValue); - return true; - } + if (!findAttributeWithName(request.token, attributeName, indexOfAttribute)) + return false; + + const HTMLToken::Attribute& attribute = request.token.attributes().at(indexOfAttribute); + if (!isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), treatment))) + return false; + + if (threadSafeMatch(attributeName, srcAttr)) { + if (isLikelySafeResource(String(attribute.value))) + return false; + } else if (threadSafeMatch(attributeName, http_equivAttr)) { + if (!isDangerousHTTPEquiv(String(attribute.value))) + return false; } - return false; + + request.token.eraseValueOfAttribute(indexOfAttribute); + if (!replacementValue.isEmpty()) + request.token.appendToAttributeValue(indexOfAttribute, replacementValue); + + return true; } -String XSSAuditor::decodedSnippetForName(const FilterTokenRequest& request) +String XSSAuditor::canonicalizedSnippetForTagName(const FilterTokenRequest& request) { // Grab a fixed number of characters equal to the length of the token's name plus one (to account for the "<"). - return fullyDecodeString(request.sourceTracker.sourceForToken(request.token), m_encoding).substring(0, request.token.name().size() + 1); + return canonicalize(request.sourceTracker.sourceForToken(request.token).substring(0, request.token.name().size() + 1), NoTruncation); +} + +String XSSAuditor::nameFromAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute) +{ + // The range inlcudes the character which terminates the name. So, + // for an input of |name="value"|, the snippet is |name=|. + int start = attribute.nameRange.start - request.token.startIndex(); + int end = attribute.valueRange.start - request.token.startIndex(); + return request.sourceTracker.sourceForToken(request.token).substring(start, end - start); } -String XSSAuditor::decodedSnippetForAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute, AttributeKind treatment) +String XSSAuditor::snippetFromAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute) { - // The range doesn't inlcude the character which terminates the value. So, + // The range doesn't include the character which terminates the value. So, // for an input of |name="value"|, the snippet is |name="value|. For an // unquoted input of |name=value |, the snippet is |name=value|. // FIXME: We should grab one character before the name also. int start = attribute.nameRange.start - request.token.startIndex(); int end = attribute.valueRange.end - request.token.startIndex(); - String decodedSnippet = fullyDecodeString(request.sourceTracker.sourceForToken(request.token).substring(start, end - start), m_encoding); - decodedSnippet.truncate(kMaximumFragmentLengthTarget); - if (treatment == SrcLikeAttribute) { - int slashCount = 0; - bool commaSeen = false; - // In HTTP URLs, characters following the first ?, #, or third slash may come from - // the page itself and can be merely ignored by an attacker's server when a remote - // script or script-like resource is requested. In DATA URLS, the payload starts at - // the first comma, and the the first /*, //, or <!-- may introduce a comment. Characters - // following this may come from the page itself and may be ignored when the script is - // executed. For simplicity, we don't differentiate based on URL scheme, and stop at - // the first # or ?, the third slash, or the first slash or < once a comma is seen. - for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) { - UChar currentChar = decodedSnippet[currentLength]; - if (currentChar == '?' - || currentChar == '#' - || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2)) - || (currentChar == '<' && commaSeen)) { - decodedSnippet.truncate(currentLength); - break; - } - if (currentChar == ',') - commaSeen = true; - } - } else if (treatment == ScriptLikeAttribute) { - // Beware of trailing characters which came from the page itself, not the - // injected vector. Excluding the terminating character covers common cases - // where the page immediately ends the attribute, but doesn't cover more - // complex cases where there is other page data following the injection. - // Generally, these won't parse as javascript, so the injected vector - // typically excludes them from consideration via a single-line comment or - // by enclosing them in a string literal terminated later by the page's own - // closing punctuation. Since the snippet has not been parsed, the vector - // may also try to introduce these via entities. As a result, we'd like to - // stop before the first "//", the first <!--, the first entity, or the first - // quote not immediately following the first equals sign (taking whitespace - // into consideration). To keep things simpler, we don't try to distinguish - // between entity-introducing amperands vs. other uses, nor do we bother to - // check for a second slash for a comment, nor do we bother to check for - // !-- following a less-than sign. We stop instead on any ampersand - // slash, or less-than sign. - size_t position = 0; - if ((position = decodedSnippet.find("=")) != kNotFound - && (position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) != kNotFound - && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != kNotFound) { - decodedSnippet.truncate(position); - } + return request.sourceTracker.sourceForToken(request.token).substring(start, end - start); +} + +String XSSAuditor::canonicalize(String snippet, TruncationKind treatment) +{ + String decodedSnippet = fullyDecodeString(snippet, m_encoding); + + if (treatment != NoTruncation) { + decodedSnippet.truncate(kMaximumFragmentLengthTarget); + if (treatment == SrcLikeAttributeTruncation) + truncateForSrcLikeAttribute(decodedSnippet); + else if (treatment == ScriptLikeAttributeTruncation) + truncateForScriptLikeAttribute(decodedSnippet); } - return decodedSnippet; + + return decodedSnippet.removeCharacters(&isNonCanonicalCharacter); } -String XSSAuditor::decodedSnippetForJavaScript(const FilterTokenRequest& request) +String XSSAuditor::canonicalizedSnippetForJavaScript(const FilterTokenRequest& request) { String string = request.sourceTracker.sourceForToken(request.token); size_t startPosition = 0; size_t endPosition = string.length(); size_t foundPosition = kNotFound; + size_t lastNonSpacePosition = kNotFound; // Skip over initial comments to find start of code. while (startPosition < endPosition) { @@ -667,32 +729,40 @@ String XSSAuditor::decodedSnippetForJavaScript(const FilterTokenRequest& request String result; while (startPosition < endPosition && !result.length()) { - // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we - // encounter a comma, or when we exceed the maximum length target. The comma rule - // covers a common parameter concatenation case performed by some webservers. - // After hitting the length target, we can only stop at a point where we know we are - // not in the middle of a %-escape sequence. For the sake of simplicity, approximate - // not stopping inside a (possibly multiply encoded) %-esacpe sequence by breaking on - // whitespace only. We should have enough text in these cases to avoid false positives. + // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we encounter a comma, + // when we hit an opening <script> tag, or when we exceed the maximum length target. The comma rule + // covers a common parameter concatenation case performed by some web servers. + lastNonSpacePosition = kNotFound; for (foundPosition = startPosition; foundPosition < endPosition; foundPosition++) { if (!request.shouldAllowCDATA) { - if (startsSingleLineCommentAt(string, foundPosition) || startsMultiLineCommentAt(string, foundPosition)) { - foundPosition += 2; - break; - } - if (startsHTMLCommentAt(string, foundPosition)) { - foundPosition += 4; + if (startsSingleLineCommentAt(string, foundPosition) + || startsMultiLineCommentAt(string, foundPosition) + || startsHTMLCommentAt(string, foundPosition)) { break; } } - if (string[foundPosition] == ',' || (foundPosition > startPosition + kMaximumFragmentLengthTarget && isHTMLSpace<UChar>(string[foundPosition]))) { + if (string[foundPosition] == ',') + break; + + if (lastNonSpacePosition != kNotFound && startsOpeningScriptTagAt(string, foundPosition)) { + foundPosition = lastNonSpacePosition; break; } + if (foundPosition > startPosition + kMaximumFragmentLengthTarget) { + // After hitting the length target, we can only stop at a point where we know we are + // not in the middle of a %-escape sequence. For the sake of simplicity, approximate + // not stopping inside a (possibly multiply encoded) %-escape sequence by breaking on + // whitespace only. We should have enough text in these cases to avoid false positives. + if (isHTMLSpace<UChar>(string[foundPosition])) + break; + } + if (!isHTMLSpace<UChar>(string[foundPosition])) + lastNonSpacePosition = foundPosition; } - - result = fullyDecodeString(string.substring(startPosition, foundPosition - startPosition), m_encoding); + result = canonicalize(string.substring(startPosition, foundPosition - startPosition), NoTruncation); startPosition = foundPosition + 1; } + return result; } @@ -732,7 +802,8 @@ bool XSSAuditor::isSafeToSendToAnotherThread() const { return m_documentURL.isSafeToSendToAnotherThread() && m_decodedURL.isSafeToSendToAnotherThread() - && m_decodedHTTPBody.isSafeToSendToAnotherThread(); + && m_decodedHTTPBody.isSafeToSendToAnotherThread() + && m_httpBodyAsString.isSafeToSendToAnotherThread(); } } // namespace WebCore diff --git a/chromium/third_party/WebKit/Source/core/html/parser/XSSAuditor.h b/chromium/third_party/WebKit/Source/core/html/parser/XSSAuditor.h index db2655f4a8a..53a38ac606d 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/XSSAuditor.h +++ b/chromium/third_party/WebKit/Source/core/html/parser/XSSAuditor.h @@ -64,6 +64,8 @@ public: PassOwnPtr<XSSInfo> filterToken(const FilterTokenRequest&); bool isSafeToSendToAnotherThread() const; + void setEncoding(const WTF::TextEncoding&); + private: static const size_t kMaximumFragmentLengthTarget = 100; @@ -74,10 +76,11 @@ private: SuppressingAdjacentCharacterTokens }; - enum AttributeKind { - NormalAttribute, - SrcLikeAttribute, - ScriptLikeAttribute + enum TruncationKind { + NoTruncation, + NormalAttributeTruncation, + SrcLikeAttributeTruncation, + ScriptLikeAttributeTruncation }; bool filterStartToken(const FilterTokenRequest&); @@ -96,12 +99,13 @@ private: bool filterButtonToken(const FilterTokenRequest&); bool eraseDangerousAttributesIfInjected(const FilterTokenRequest&); - bool eraseAttributeIfInjected(const FilterTokenRequest&, const QualifiedName&, const String& replacementValue = String(), AttributeKind treatment = NormalAttribute); + bool eraseAttributeIfInjected(const FilterTokenRequest&, const QualifiedName&, const String& replacementValue = String(), TruncationKind treatment = NormalAttributeTruncation); - String decodedSnippetForToken(const HTMLToken&); - String decodedSnippetForName(const FilterTokenRequest&); - String decodedSnippetForAttribute(const FilterTokenRequest&, const HTMLToken::Attribute&, AttributeKind treatment = NormalAttribute); - String decodedSnippetForJavaScript(const FilterTokenRequest&); + String canonicalizedSnippetForTagName(const FilterTokenRequest&); + String canonicalizedSnippetForJavaScript(const FilterTokenRequest&); + String nameFromAttribute(const FilterTokenRequest&, const HTMLToken::Attribute&); + String snippetFromAttribute(const FilterTokenRequest&, const HTMLToken::Attribute&); + String canonicalize(String, TruncationKind); bool isContainedInRequest(const String&); bool isLikelySafeResource(const String& url); @@ -115,6 +119,7 @@ private: String m_decodedURL; String m_decodedHTTPBody; + String m_httpBodyAsString; OwnPtr<SuffixTree<ASCIICodebook> > m_decodedHTTPBodySuffixTree; State m_state; diff --git a/chromium/third_party/WebKit/Source/core/html/parser/XSSAuditorDelegate.cpp b/chromium/third_party/WebKit/Source/core/html/parser/XSSAuditorDelegate.cpp index 5ea9b66ccf7..9b329e85ccd 100644 --- a/chromium/third_party/WebKit/Source/core/html/parser/XSSAuditorDelegate.cpp +++ b/chromium/third_party/WebKit/Source/core/html/parser/XSSAuditorDelegate.cpp @@ -27,7 +27,7 @@ #include "core/html/parser/XSSAuditorDelegate.h" #include "core/dom/Document.h" -#include "core/frame/Frame.h" +#include "core/frame/LocalFrame.h" #include "core/loader/DocumentLoader.h" #include "core/loader/FrameLoader.h" #include "core/loader/FrameLoaderClient.h" @@ -100,8 +100,8 @@ void XSSAuditorDelegate::didBlockScript(const XSSInfo& xssInfo) m_document->addConsoleMessage(JSMessageSource, ErrorMessageLevel, xssInfo.buildConsoleError()); - // stopAllLoaders can detach the Frame, so protect it. - RefPtr<Frame> protect(m_document->frame()); + // stopAllLoaders can detach the LocalFrame, so protect it. + RefPtr<LocalFrame> protect(m_document->frame()); FrameLoader& frameLoader = m_document->frame()->loader(); if (xssInfo.m_didBlockEntirePage) frameLoader.stopAllLoaders(); @@ -116,7 +116,7 @@ void XSSAuditorDelegate::didBlockScript(const XSSInfo& xssInfo) } if (xssInfo.m_didBlockEntirePage) - m_document->frame()->navigationScheduler().scheduleLocationChange(m_document, SecurityOrigin::urlWithUniqueSecurityOrigin(), String()); + m_document->frame()->navigationScheduler().scheduleLocationChange(m_document, SecurityOrigin::urlWithUniqueSecurityOrigin(), Referrer()); } } // namespace WebCore diff --git a/chromium/third_party/WebKit/Source/core/html/parser/create-html-entity-table b/chromium/third_party/WebKit/Source/core/html/parser/create-html-entity-table index 8c408fead9a..587e8a8f5d1 100755 --- a/chromium/third_party/WebKit/Source/core/html/parser/create-html-entity-table +++ b/chromium/third_party/WebKit/Source/core/html/parser/create-html-entity-table @@ -27,6 +27,11 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +"""This python script creates the raw data that is our entity +database. The representation is one string database containing all +strings we could need, and then a mapping from offset+length -> entity +data. That is compact, easy to use and efficient.""" + import csv import os.path import string @@ -35,17 +40,6 @@ import sys ENTITY = 0 VALUE = 1 -def convert_entity_to_cpp_name(entity): - postfix = "EntityName" - if entity[-1] == ";": - return "%sSemicolon%s" % (entity[:-1], postfix) - return "%s%s" % (entity, postfix) - - -def convert_entity_to_uchar_array(entity): - return "{'%s'}" % "', '".join(entity) - - def convert_value_to_int(value): if not value: return "0"; @@ -67,9 +61,8 @@ if len(sys.argv) < 4 or sys.argv[1] != "-o": output_path = sys.argv[2] input_path = sys.argv[3] -html_entity_names_file = open(input_path) -entries = list(csv.reader(html_entity_names_file)) -html_entity_names_file.close() +with open(input_path) as html_entity_names_file: + entries = list(csv.reader(html_entity_names_file)) entries.sort(key = lambda entry: entry[ENTITY]) entity_count = len(entries) @@ -101,7 +94,7 @@ output_file.write("""/* * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -// THIS FILE IS GENERATED BY WebCore/html/parser/create-html-entity-table +// THIS FILE IS GENERATED BY core/html/parser/create-html-entity-table // DO NOT EDIT (unless you are a ninja)! #include "config.h" @@ -112,62 +105,126 @@ namespace WebCore { namespace { """) +assert len(entries) > 0, "Code assumes a non-empty entity array." +def check_ascii(entity_string): + for c in entity_string: + code = ord(c) + assert 0 <= code <= 127, (c + " is not ASCII. Need to change type " + + "of storage from LChar to UChar to support " + + "this entity.") + +output_file.write("static const LChar staticEntityStringStorage[] = {\n") +output_file.write("'") +all_data = "" +entity_offset = 0 +first_output = True +saved_by_reusing = 0 for entry in entries: - output_file.write("static const UChar %s[] = %s;\n" % ( - convert_entity_to_cpp_name(entry[ENTITY]), - convert_entity_to_uchar_array(entry[ENTITY]))) + check_ascii(entry[ENTITY]) + # Reuse substrings from earlier entries. This saves 1-2000 + # characters, but it's O(n^2) and not very smart. The optimal + # solution has to solve the "Shortest Common Superstring" problem + # and that is NP-Complete or worse. + # + # This would be even more efficient if we didn't store the + # semi-colon in the array but as a bit in the entry. + entity = entry[ENTITY] + already_existing_offset = all_data.find(entity) + if already_existing_offset != -1: + # Reusing space. + this_offset = already_existing_offset + saved_by_reusing += len(entity) + else: + if not first_output: + output_file.write(",\n'") + first_output = False + + # Try the end of the string and see if we can reuse that to + # fit the start of the new entity. + data_to_add = entity + this_offset = entity_offset + for truncated_len in range(len(entity) - 1, 0, -1): + if all_data.endswith(entity[:truncated_len]): + data_to_add = entity[truncated_len:] + this_offset = entity_offset - truncated_len + saved_by_reusing += truncated_len + break + + output_file.write("', '".join(data_to_add)) + all_data += data_to_add + output_file.write("'") + entity_offset += len(data_to_add) + assert len(entry) == 2, "We will use slot [2] in the list for the offset." + assert this_offset < 32768 # Stored in a 16 bit short. + entry.append(this_offset) + +output_file.write("};\n") + +index = {} +for offset, entry in enumerate(entries): + starting_letter = entry[ENTITY][0] + if starting_letter not in index: + index[starting_letter] = offset output_file.write(""" static const HTMLEntityTableEntry staticEntityTable[%s] = {\n""" % entity_count) -index = {} -offset = 0 for entry in entries: - letter = entry[ENTITY][0] - if letter not in index: - index[letter] = offset values = entry[VALUE].split(' ') assert len(values) <= 2, values - output_file.write(' { %s, %s, %s, %s },\n' % ( - convert_entity_to_cpp_name(entry[ENTITY]), - len(entry[ENTITY]), + output_file.write(' { %s, %s, %s, %s }, // &%s\n' % ( convert_value_to_int(values[0]), - convert_value_to_int(values[1] if len(values) >= 2 else ""))) - offset += 1 + convert_value_to_int(values[1] if len(values) >= 2 else ""), + entry[2], + len(entry[ENTITY]), + entry[ENTITY], + )) output_file.write("""}; """) -output_file.write("static const HTMLEntityTableEntry* uppercaseOffset[] = {\n") +output_file.write(""" +} +""") + +output_file.write("static const short uppercaseOffset[] = {\n") for letter in string.ascii_uppercase: - output_file.write("%s\n" % offset_table_entry(index[letter])) -output_file.write("%s\n" % offset_table_entry(index['a'])) + output_file.write("%d,\n" % index[letter]) +output_file.write("%d\n" % index['a']) output_file.write("""}; -static const HTMLEntityTableEntry* lowercaseOffset[] = {\n""") +static const short lowercaseOffset[] = {\n""") for letter in string.ascii_lowercase: - output_file.write("%s\n" % offset_table_entry(index[letter])) -output_file.write("%s\n" % offset_table_entry(entity_count)) + output_file.write("%d,\n" % index[letter]) +output_file.write("%d\n" % entity_count) output_file.write("""}; +const LChar* HTMLEntityTable::entityString(const HTMLEntityTableEntry& entry) +{ + return staticEntityStringStorage + entry.entityOffset; +} + +LChar HTMLEntityTableEntry::lastCharacter() const +{ + return HTMLEntityTable::entityString(*this)[length - 1]; } const HTMLEntityTableEntry* HTMLEntityTable::firstEntryStartingWith(UChar c) { if (c >= 'A' && c <= 'Z') - return uppercaseOffset[c - 'A']; + return &staticEntityTable[uppercaseOffset[c - 'A']]; if (c >= 'a' && c <= 'z') - return lowercaseOffset[c - 'a']; + return &staticEntityTable[lowercaseOffset[c - 'a']]; return 0; } const HTMLEntityTableEntry* HTMLEntityTable::lastEntryStartingWith(UChar c) { if (c >= 'A' && c <= 'Z') - return uppercaseOffset[c - 'A' + 1] - 1; + return &staticEntityTable[uppercaseOffset[c - 'A' + 1]] - 1; if (c >= 'a' && c <= 'z') - return lowercaseOffset[c - 'a' + 1] - 1; + return &staticEntityTable[lowercaseOffset[c - 'a' + 1]] - 1; return 0; } |