diff options
Diffstat (limited to 'chromium/components/url_matcher/substring_set_matcher.cc')
-rw-r--r-- | chromium/components/url_matcher/substring_set_matcher.cc | 272 |
1 files changed, 0 insertions, 272 deletions
diff --git a/chromium/components/url_matcher/substring_set_matcher.cc b/chromium/components/url_matcher/substring_set_matcher.cc deleted file mode 100644 index 9669788152c..00000000000 --- a/chromium/components/url_matcher/substring_set_matcher.cc +++ /dev/null @@ -1,272 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "components/url_matcher/substring_set_matcher.h" - -#include <algorithm> -#include <queue> - -#include "base/logging.h" -#include "base/stl_util.h" - -namespace url_matcher { - -namespace { - -// Compare StringPattern instances based on their string patterns. -bool ComparePatterns(const StringPattern* a, const StringPattern* b) { - return a->pattern() < b->pattern(); -} - -// Given the set of patterns, compute how many nodes will the corresponding -// Aho-Corasick tree have. Note that |patterns| need to be sorted. -uint32 TreeSize(const std::vector<const StringPattern*>& patterns) { - uint32 result = 1u; // 1 for the root node. - if (patterns.empty()) - return result; - - std::vector<const StringPattern*>::const_iterator last = patterns.begin(); - std::vector<const StringPattern*>::const_iterator current = last + 1; - // For the first pattern, each letter is a label of an edge to a new node. - result += (*last)->pattern().size(); - - // For the subsequent patterns, only count the edges which were not counted - // yet. For this it suffices to test against the previous pattern, because the - // patterns are sorted. - for (; current != patterns.end(); ++last, ++current) { - const std::string& last_pattern = (*last)->pattern(); - const std::string& current_pattern = (*current)->pattern(); - const uint32 prefix_bound = - std::min(last_pattern.size(), current_pattern.size()); - - uint32 common_prefix = 0; - while (common_prefix < prefix_bound && - last_pattern[common_prefix] == current_pattern[common_prefix]) - ++common_prefix; - result += current_pattern.size() - common_prefix; - } - return result; -} - -} // namespace - -// -// SubstringSetMatcher -// - -SubstringSetMatcher::SubstringSetMatcher() { - RebuildAhoCorasickTree(SubstringPatternVector()); -} - -SubstringSetMatcher::~SubstringSetMatcher() {} - -void SubstringSetMatcher::RegisterPatterns( - const std::vector<const StringPattern*>& patterns) { - RegisterAndUnregisterPatterns(patterns, - std::vector<const StringPattern*>()); -} - -void SubstringSetMatcher::UnregisterPatterns( - const std::vector<const StringPattern*>& patterns) { - RegisterAndUnregisterPatterns(std::vector<const StringPattern*>(), - patterns); -} - -void SubstringSetMatcher::RegisterAndUnregisterPatterns( - const std::vector<const StringPattern*>& to_register, - const std::vector<const StringPattern*>& to_unregister) { - // Register patterns. - for (std::vector<const StringPattern*>::const_iterator i = - to_register.begin(); i != to_register.end(); ++i) { - DCHECK(patterns_.find((*i)->id()) == patterns_.end()); - patterns_[(*i)->id()] = *i; - } - - // Unregister patterns - for (std::vector<const StringPattern*>::const_iterator i = - to_unregister.begin(); i != to_unregister.end(); ++i) { - patterns_.erase((*i)->id()); - } - - // Now we compute the total number of tree nodes needed. - SubstringPatternVector sorted_patterns; - sorted_patterns.resize(patterns_.size()); - - size_t next = 0; - for (SubstringPatternMap::const_iterator i = patterns_.begin(); - i != patterns_.end(); - ++i, ++next) { - sorted_patterns[next] = i->second; - } - - std::sort(sorted_patterns.begin(), sorted_patterns.end(), ComparePatterns); - tree_.reserve(TreeSize(sorted_patterns)); - - RebuildAhoCorasickTree(sorted_patterns); -} - -bool SubstringSetMatcher::Match(const std::string& text, - std::set<StringPattern::ID>* matches) const { - const size_t old_number_of_matches = matches->size(); - - // Handle patterns matching the empty string. - matches->insert(tree_[0].matches().begin(), tree_[0].matches().end()); - - uint32 current_node = 0; - for (std::string::const_iterator i = text.begin(); i != text.end(); ++i) { - uint32 edge_from_current = tree_[current_node].GetEdge(*i); - while (edge_from_current == AhoCorasickNode::kNoSuchEdge && - current_node != 0) { - current_node = tree_[current_node].failure(); - edge_from_current = tree_[current_node].GetEdge(*i); - } - if (edge_from_current != AhoCorasickNode::kNoSuchEdge) { - current_node = edge_from_current; - matches->insert(tree_[current_node].matches().begin(), - tree_[current_node].matches().end()); - } else { - DCHECK_EQ(0u, current_node); - } - } - - return old_number_of_matches != matches->size(); -} - -bool SubstringSetMatcher::IsEmpty() const { - // An empty tree consists of only the root node. - return patterns_.empty() && tree_.size() == 1u; -} - -void SubstringSetMatcher::RebuildAhoCorasickTree( - const SubstringPatternVector& sorted_patterns) { - tree_.clear(); - - // Initialize root note of tree. - AhoCorasickNode root; - root.set_failure(0); - tree_.push_back(root); - - // Insert all patterns. - for (SubstringPatternVector::const_iterator i = sorted_patterns.begin(); - i != sorted_patterns.end(); - ++i) { - InsertPatternIntoAhoCorasickTree(*i); - } - - CreateFailureEdges(); -} - -void SubstringSetMatcher::InsertPatternIntoAhoCorasickTree( - const StringPattern* pattern) { - const std::string& text = pattern->pattern(); - const std::string::const_iterator text_end = text.end(); - - // Iterators on the tree and the text. - uint32 current_node = 0; - std::string::const_iterator i = text.begin(); - - // Follow existing paths for as long as possible. - while (i != text_end) { - uint32 edge_from_current = tree_[current_node].GetEdge(*i); - if (edge_from_current == AhoCorasickNode::kNoSuchEdge) - break; - current_node = edge_from_current; - ++i; - } - - // Create new nodes if necessary. - while (i != text_end) { - tree_.push_back(AhoCorasickNode()); - tree_[current_node].SetEdge(*i, tree_.size() - 1); - current_node = tree_.size() - 1; - ++i; - } - - // Register match. - tree_[current_node].AddMatch(pattern->id()); -} - -void SubstringSetMatcher::CreateFailureEdges() { - typedef AhoCorasickNode::Edges Edges; - - std::queue<uint32> queue; - - AhoCorasickNode& root = tree_[0]; - root.set_failure(0); - const Edges& root_edges = root.edges(); - for (Edges::const_iterator e = root_edges.begin(); e != root_edges.end(); - ++e) { - const uint32& leads_to = e->second; - tree_[leads_to].set_failure(0); - queue.push(leads_to); - } - - while (!queue.empty()) { - AhoCorasickNode& current_node = tree_[queue.front()]; - queue.pop(); - for (Edges::const_iterator e = current_node.edges().begin(); - e != current_node.edges().end(); ++e) { - const char& edge_label = e->first; - const uint32& leads_to = e->second; - queue.push(leads_to); - - uint32 failure = current_node.failure(); - uint32 edge_from_failure = tree_[failure].GetEdge(edge_label); - while (edge_from_failure == AhoCorasickNode::kNoSuchEdge && - failure != 0) { - failure = tree_[failure].failure(); - edge_from_failure = tree_[failure].GetEdge(edge_label); - } - - const uint32 follow_in_case_of_failure = - edge_from_failure != AhoCorasickNode::kNoSuchEdge - ? edge_from_failure - : 0; - tree_[leads_to].set_failure(follow_in_case_of_failure); - tree_[leads_to].AddMatches(tree_[follow_in_case_of_failure].matches()); - } - } -} - -const uint32 SubstringSetMatcher::AhoCorasickNode::kNoSuchEdge = ~0; - -SubstringSetMatcher::AhoCorasickNode::AhoCorasickNode() - : failure_(kNoSuchEdge) {} - -SubstringSetMatcher::AhoCorasickNode::~AhoCorasickNode() {} - -SubstringSetMatcher::AhoCorasickNode::AhoCorasickNode( - const SubstringSetMatcher::AhoCorasickNode& other) - : edges_(other.edges_), - failure_(other.failure_), - matches_(other.matches_) {} - -SubstringSetMatcher::AhoCorasickNode& -SubstringSetMatcher::AhoCorasickNode::operator=( - const SubstringSetMatcher::AhoCorasickNode& other) { - edges_ = other.edges_; - failure_ = other.failure_; - matches_ = other.matches_; - return *this; -} - -uint32 SubstringSetMatcher::AhoCorasickNode::GetEdge(char c) const { - Edges::const_iterator i = edges_.find(c); - return i == edges_.end() ? kNoSuchEdge : i->second; -} - -void SubstringSetMatcher::AhoCorasickNode::SetEdge(char c, uint32 node) { - edges_[c] = node; -} - -void SubstringSetMatcher::AhoCorasickNode::AddMatch(StringPattern::ID id) { - matches_.insert(id); -} - -void SubstringSetMatcher::AhoCorasickNode::AddMatches( - const SubstringSetMatcher::AhoCorasickNode::Matches& matches) { - matches_.insert(matches.begin(), matches.end()); -} - -} // namespace url_matcher |