diff options
Diffstat (limited to 'chromium/net/filter/sdch_filter.cc')
-rw-r--r-- | chromium/net/filter/sdch_filter.cc | 394 |
1 files changed, 394 insertions, 0 deletions
diff --git a/chromium/net/filter/sdch_filter.cc b/chromium/net/filter/sdch_filter.cc new file mode 100644 index 00000000000..2ef5ad51d7c --- /dev/null +++ b/chromium/net/filter/sdch_filter.cc @@ -0,0 +1,394 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/filter/sdch_filter.h" + +#include <ctype.h> +#include <limits.h> + +#include <algorithm> + +#include "base/logging.h" +#include "base/metrics/histogram.h" +#include "net/base/sdch_manager.h" +#include "net/url_request/url_request_context.h" + +#include "sdch/open-vcdiff/src/google/vcdecoder.h" + +namespace net { + +SdchFilter::SdchFilter(const FilterContext& filter_context) + : filter_context_(filter_context), + decoding_status_(DECODING_UNINITIALIZED), + dictionary_hash_(), + dictionary_hash_is_plausible_(false), + dictionary_(NULL), + url_request_context_(filter_context.GetURLRequestContext()), + dest_buffer_excess_(), + dest_buffer_excess_index_(0), + source_bytes_(0), + output_bytes_(0), + possible_pass_through_(false) { + bool success = filter_context.GetMimeType(&mime_type_); + DCHECK(success); + success = filter_context.GetURL(&url_); + DCHECK(success); + DCHECK(url_request_context_->sdch_manager()); +} + +SdchFilter::~SdchFilter() { + // All code here is for gathering stats, and can be removed when SDCH is + // considered stable. + + static int filter_use_count = 0; + ++filter_use_count; + if (META_REFRESH_RECOVERY == decoding_status_) { + UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count); + } + + if (vcdiff_streaming_decoder_.get()) { + if (!vcdiff_streaming_decoder_->FinishDecoding()) { + decoding_status_ = DECODING_ERROR; + SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT); + // Make it possible for the user to hit reload, and get non-sdch content. + // Note this will "wear off" quickly enough, and is just meant to assure + // in some rare case that the user is not stuck. + url_request_context_->sdch_manager()->BlacklistDomain( + url_); + UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn", + static_cast<int>(filter_context_.GetByteReadCount())); + UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_); + UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_); + } + } + + if (!dest_buffer_excess_.empty()) { + // Filter chaining error, or premature teardown. + SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT); + UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn", + static_cast<int>(filter_context_.GetByteReadCount())); + UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize", + dest_buffer_excess_.size()); + UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_); + UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_); + } + + if (filter_context_.IsCachedContent()) { + // Not a real error, but it is useful to have this tally. + // TODO(jar): Remove this stat after SDCH stability is validated. + SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED); + return; // We don't need timing stats, and we aready got ratios. + } + + switch (decoding_status_) { + case DECODING_IN_PROGRESS: { + if (output_bytes_) + UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a", + static_cast<int>( + (filter_context_.GetByteReadCount() * 100) / output_bytes_)); + UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a", + output_bytes_); + filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE); + + // Allow latency experiments to proceed. + url_request_context_->sdch_manager()->SetAllowLatencyExperiment( + url_, true); + return; + } + case PASS_THROUGH: { + filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH); + return; + } + case DECODING_UNINITIALIZED: { + SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED); + return; + } + case WAITING_FOR_DICTIONARY_SELECTION: { + SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY); + return; + } + case DECODING_ERROR: { + SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR); + return; + } + case META_REFRESH_RECOVERY: { + // Already accounted for when set. + return; + } + } // end of switch. +} + +bool SdchFilter::InitDecoding(Filter::FilterType filter_type) { + if (decoding_status_ != DECODING_UNINITIALIZED) + return false; + + // Handle case where sdch filter is guessed, but not required. + if (FILTER_TYPE_SDCH_POSSIBLE == filter_type) + possible_pass_through_ = true; + + // Initialize decoder only after we have a dictionary in hand. + decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION; + return true; +} + +#ifndef NDEBUG +static const char* kDecompressionErrorHtml = + "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>" + "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;" + "border-color:black;border-style:solid;text-align:left;font-family:arial;" + "font-size:10pt;foreground-color:black;background-color:white\">" + "An error occurred. This page will be reloaded shortly. " + "Or press the \"reload\" button now to reload it immediately." + "</div>"; +#else +static const char* kDecompressionErrorHtml = + "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"; +#endif + +Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer, + int* dest_len) { + int available_space = *dest_len; + *dest_len = 0; // Nothing output yet. + + if (!dest_buffer || available_space <= 0) + return FILTER_ERROR; + + if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) { + FilterStatus status = InitializeDictionary(); + if (FILTER_NEED_MORE_DATA == status) + return FILTER_NEED_MORE_DATA; + if (FILTER_ERROR == status) { + DCHECK_EQ(DECODING_ERROR, decoding_status_); + DCHECK_EQ(0u, dest_buffer_excess_index_); + DCHECK(dest_buffer_excess_.empty()); + // This is where we try very hard to do error recovery, and make this + // protocol robust in the face of proxies that do many different things. + // If we decide that things are looking very bad (too hard to recover), + // we may even issue a "meta-refresh" to reload the page without an SDCH + // advertisement (so that we are sure we're not hurting anything). + // + // Watch out for an error page inserted by the proxy as part of a 40x + // error response. When we see such content molestation, we certainly + // need to fall into the meta-refresh case. + if (filter_context_.GetResponseCode() == 404) { + // We could be more generous, but for now, only a "NOT FOUND" code will + // cause a pass through. All other bad codes will fall into a + // meta-refresh. + SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE); + decoding_status_ = PASS_THROUGH; + } else if (filter_context_.GetResponseCode() != 200) { + // We need to meta-refresh, with SDCH disabled. + } else if (filter_context_.IsCachedContent() + && !dictionary_hash_is_plausible_) { + // We must have hit the back button, and gotten content that was fetched + // before we *really* advertised SDCH and a dictionary. + SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED); + decoding_status_ = PASS_THROUGH; + } else if (possible_pass_through_) { + // This is the potentially most graceful response. There really was no + // error. We were just overly cautious when we added a TENTATIVE_SDCH. + // We added the sdch coding tag, and it should not have been added. + // This can happen in server experiments, where the server decides + // not to use sdch, even though there is a dictionary. To be + // conservative, we locally added the tentative sdch (fearing that a + // proxy stripped it!) and we must now recant (pass through). + SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH); + // However.... just to be sure we don't get burned by proxies that + // re-compress with gzip or other system, we can sniff to see if this + // is compressed data etc. For now, we do nothing, which gets us into + // the meta-refresh result. + // TODO(jar): Improve robustness by sniffing for valid text that we can + // actual use re: decoding_status_ = PASS_THROUGH; + } else if (dictionary_hash_is_plausible_) { + // We need a meta-refresh since we don't have the dictionary. + // The common cause is a restart of the browser, where we try to render + // cached content that was saved when we had a dictionary. + } else if (filter_context_.IsSdchResponse()) { + // This is a very corrupt SDCH request response. We can't decode it. + // We'll use a meta-refresh, and get content without asking for SDCH. + // This will also progressively disable SDCH for this domain. + } else { + // One of the first 9 bytes precluded consideration as a hash. + // This can't be an SDCH payload, even though the server said it was. + // This is a major error, as the server or proxy tagged this SDCH even + // though it is not! + // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!! + // Worse yet, meta-refresh could lead to an infinite refresh loop. + SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH); + decoding_status_ = PASS_THROUGH; + // ... but further back-off on advertising SDCH support. + url_request_context_->sdch_manager()->BlacklistDomain(url_); + } + + if (decoding_status_ == PASS_THROUGH) { + dest_buffer_excess_ = dictionary_hash_; // Send what we scanned. + } else { + // This is where we try to do the expensive meta-refresh. + if (std::string::npos == mime_type_.find("text/html")) { + // Since we can't do a meta-refresh (along with an exponential + // backoff), we'll just make sure this NEVER happens again. + url_request_context_->sdch_manager()->BlacklistDomainForever(url_); + if (filter_context_.IsCachedContent()) + SdchManager::SdchErrorRecovery( + SdchManager::CACHED_META_REFRESH_UNSUPPORTED); + else + SdchManager::SdchErrorRecovery( + SdchManager::META_REFRESH_UNSUPPORTED); + return FILTER_ERROR; + } + // HTML content means we can issue a meta-refresh, and get the content + // again, perhaps without SDCH (to be safe). + if (filter_context_.IsCachedContent()) { + // Cached content is probably a startup tab, so we'll just get fresh + // content and try again, without disabling sdch. + SdchManager::SdchErrorRecovery( + SdchManager::META_REFRESH_CACHED_RECOVERY); + } else { + // Since it wasn't in the cache, we definately need at least some + // period of blacklisting to get the correct content. + url_request_context_->sdch_manager()->BlacklistDomain(url_); + SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY); + } + decoding_status_ = META_REFRESH_RECOVERY; + // Issue a meta redirect with SDCH disabled. + dest_buffer_excess_ = kDecompressionErrorHtml; + } + } else { + DCHECK_EQ(DECODING_IN_PROGRESS, decoding_status_); + } + } + + int amount = OutputBufferExcess(dest_buffer, available_space); + *dest_len += amount; + dest_buffer += amount; + available_space -= amount; + DCHECK_GE(available_space, 0); + + if (available_space <= 0) + return FILTER_OK; + DCHECK(dest_buffer_excess_.empty()); + DCHECK_EQ(0u, dest_buffer_excess_index_); + + if (decoding_status_ != DECODING_IN_PROGRESS) { + if (META_REFRESH_RECOVERY == decoding_status_) { + // Absorb all input data. We've already output page reload HTML. + next_stream_data_ = NULL; + stream_data_len_ = 0; + return FILTER_NEED_MORE_DATA; + } + if (PASS_THROUGH == decoding_status_) { + // We must pass in available_space, but it will be changed to bytes_used. + FilterStatus result = CopyOut(dest_buffer, &available_space); + // Accumulate the returned count of bytes_used (a.k.a., available_space). + *dest_len += available_space; + return result; + } + DCHECK(false); + decoding_status_ = DECODING_ERROR; + return FILTER_ERROR; + } + + if (!next_stream_data_ || stream_data_len_ <= 0) + return FILTER_NEED_MORE_DATA; + + bool ret = vcdiff_streaming_decoder_->DecodeChunk( + next_stream_data_, stream_data_len_, &dest_buffer_excess_); + // Assume all data was used in decoding. + next_stream_data_ = NULL; + source_bytes_ += stream_data_len_; + stream_data_len_ = 0; + output_bytes_ += dest_buffer_excess_.size(); + if (!ret) { + vcdiff_streaming_decoder_.reset(NULL); // Don't call it again. + decoding_status_ = DECODING_ERROR; + SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR); + return FILTER_ERROR; + } + + amount = OutputBufferExcess(dest_buffer, available_space); + *dest_len += amount; + dest_buffer += amount; + available_space -= amount; + if (0 == available_space && !dest_buffer_excess_.empty()) + return FILTER_OK; + return FILTER_NEED_MORE_DATA; +} + +Filter::FilterStatus SdchFilter::InitializeDictionary() { + const size_t kServerIdLength = 9; // Dictionary hash plus null from server. + size_t bytes_needed = kServerIdLength - dictionary_hash_.size(); + DCHECK_GT(bytes_needed, 0u); + if (!next_stream_data_) + return FILTER_NEED_MORE_DATA; + if (static_cast<size_t>(stream_data_len_) < bytes_needed) { + dictionary_hash_.append(next_stream_data_, stream_data_len_); + next_stream_data_ = NULL; + stream_data_len_ = 0; + return FILTER_NEED_MORE_DATA; + } + dictionary_hash_.append(next_stream_data_, bytes_needed); + DCHECK(kServerIdLength == dictionary_hash_.size()); + stream_data_len_ -= bytes_needed; + DCHECK_LE(0, stream_data_len_); + if (stream_data_len_ > 0) + next_stream_data_ += bytes_needed; + else + next_stream_data_ = NULL; + + DCHECK(!dictionary_); + dictionary_hash_is_plausible_ = true; // Assume plausible, but check. + + if ('\0' == dictionary_hash_[kServerIdLength - 1]) { + SdchManager* manager(url_request_context_->sdch_manager()); + manager->GetVcdiffDictionary( + std::string(dictionary_hash_, 0, kServerIdLength - 1), + url_, &dictionary_); + } else { + dictionary_hash_is_plausible_ = false; + } + + if (!dictionary_) { + DCHECK(dictionary_hash_.size() == kServerIdLength); + // Since dictionary was not found, check to see if hash was even plausible. + for (size_t i = 0; i < kServerIdLength - 1; ++i) { + char base64_char = dictionary_hash_[i]; + if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) { + dictionary_hash_is_plausible_ = false; + break; + } + } + if (dictionary_hash_is_plausible_) + SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND); + else + SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED); + decoding_status_ = DECODING_ERROR; + return FILTER_ERROR; + } + vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder); + vcdiff_streaming_decoder_->SetAllowVcdTarget(false); + vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(), + dictionary_->text().size()); + decoding_status_ = DECODING_IN_PROGRESS; + return FILTER_OK; +} + +int SdchFilter::OutputBufferExcess(char* const dest_buffer, + size_t available_space) { + if (dest_buffer_excess_.empty()) + return 0; + DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_); + size_t amount = std::min(available_space, + dest_buffer_excess_.size() - dest_buffer_excess_index_); + memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_, + amount); + dest_buffer_excess_index_ += amount; + if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) { + DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_); + dest_buffer_excess_.clear(); + dest_buffer_excess_index_ = 0; + } + return amount; +} + +} // namespace net |