summaryrefslogtreecommitdiffstats
path: root/chromium/components/url_matcher/substring_set_matcher.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/components/url_matcher/substring_set_matcher.cc')
-rw-r--r--chromium/components/url_matcher/substring_set_matcher.cc272
1 files changed, 0 insertions, 272 deletions
diff --git a/chromium/components/url_matcher/substring_set_matcher.cc b/chromium/components/url_matcher/substring_set_matcher.cc
deleted file mode 100644
index 9669788152c..00000000000
--- a/chromium/components/url_matcher/substring_set_matcher.cc
+++ /dev/null
@@ -1,272 +0,0 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "components/url_matcher/substring_set_matcher.h"
-
-#include <algorithm>
-#include <queue>
-
-#include "base/logging.h"
-#include "base/stl_util.h"
-
-namespace url_matcher {
-
-namespace {
-
-// Compare StringPattern instances based on their string patterns.
-bool ComparePatterns(const StringPattern* a, const StringPattern* b) {
- return a->pattern() < b->pattern();
-}
-
-// Given the set of patterns, compute how many nodes will the corresponding
-// Aho-Corasick tree have. Note that |patterns| need to be sorted.
-uint32 TreeSize(const std::vector<const StringPattern*>& patterns) {
- uint32 result = 1u; // 1 for the root node.
- if (patterns.empty())
- return result;
-
- std::vector<const StringPattern*>::const_iterator last = patterns.begin();
- std::vector<const StringPattern*>::const_iterator current = last + 1;
- // For the first pattern, each letter is a label of an edge to a new node.
- result += (*last)->pattern().size();
-
- // For the subsequent patterns, only count the edges which were not counted
- // yet. For this it suffices to test against the previous pattern, because the
- // patterns are sorted.
- for (; current != patterns.end(); ++last, ++current) {
- const std::string& last_pattern = (*last)->pattern();
- const std::string& current_pattern = (*current)->pattern();
- const uint32 prefix_bound =
- std::min(last_pattern.size(), current_pattern.size());
-
- uint32 common_prefix = 0;
- while (common_prefix < prefix_bound &&
- last_pattern[common_prefix] == current_pattern[common_prefix])
- ++common_prefix;
- result += current_pattern.size() - common_prefix;
- }
- return result;
-}
-
-} // namespace
-
-//
-// SubstringSetMatcher
-//
-
-SubstringSetMatcher::SubstringSetMatcher() {
- RebuildAhoCorasickTree(SubstringPatternVector());
-}
-
-SubstringSetMatcher::~SubstringSetMatcher() {}
-
-void SubstringSetMatcher::RegisterPatterns(
- const std::vector<const StringPattern*>& patterns) {
- RegisterAndUnregisterPatterns(patterns,
- std::vector<const StringPattern*>());
-}
-
-void SubstringSetMatcher::UnregisterPatterns(
- const std::vector<const StringPattern*>& patterns) {
- RegisterAndUnregisterPatterns(std::vector<const StringPattern*>(),
- patterns);
-}
-
-void SubstringSetMatcher::RegisterAndUnregisterPatterns(
- const std::vector<const StringPattern*>& to_register,
- const std::vector<const StringPattern*>& to_unregister) {
- // Register patterns.
- for (std::vector<const StringPattern*>::const_iterator i =
- to_register.begin(); i != to_register.end(); ++i) {
- DCHECK(patterns_.find((*i)->id()) == patterns_.end());
- patterns_[(*i)->id()] = *i;
- }
-
- // Unregister patterns
- for (std::vector<const StringPattern*>::const_iterator i =
- to_unregister.begin(); i != to_unregister.end(); ++i) {
- patterns_.erase((*i)->id());
- }
-
- // Now we compute the total number of tree nodes needed.
- SubstringPatternVector sorted_patterns;
- sorted_patterns.resize(patterns_.size());
-
- size_t next = 0;
- for (SubstringPatternMap::const_iterator i = patterns_.begin();
- i != patterns_.end();
- ++i, ++next) {
- sorted_patterns[next] = i->second;
- }
-
- std::sort(sorted_patterns.begin(), sorted_patterns.end(), ComparePatterns);
- tree_.reserve(TreeSize(sorted_patterns));
-
- RebuildAhoCorasickTree(sorted_patterns);
-}
-
-bool SubstringSetMatcher::Match(const std::string& text,
- std::set<StringPattern::ID>* matches) const {
- const size_t old_number_of_matches = matches->size();
-
- // Handle patterns matching the empty string.
- matches->insert(tree_[0].matches().begin(), tree_[0].matches().end());
-
- uint32 current_node = 0;
- for (std::string::const_iterator i = text.begin(); i != text.end(); ++i) {
- uint32 edge_from_current = tree_[current_node].GetEdge(*i);
- while (edge_from_current == AhoCorasickNode::kNoSuchEdge &&
- current_node != 0) {
- current_node = tree_[current_node].failure();
- edge_from_current = tree_[current_node].GetEdge(*i);
- }
- if (edge_from_current != AhoCorasickNode::kNoSuchEdge) {
- current_node = edge_from_current;
- matches->insert(tree_[current_node].matches().begin(),
- tree_[current_node].matches().end());
- } else {
- DCHECK_EQ(0u, current_node);
- }
- }
-
- return old_number_of_matches != matches->size();
-}
-
-bool SubstringSetMatcher::IsEmpty() const {
- // An empty tree consists of only the root node.
- return patterns_.empty() && tree_.size() == 1u;
-}
-
-void SubstringSetMatcher::RebuildAhoCorasickTree(
- const SubstringPatternVector& sorted_patterns) {
- tree_.clear();
-
- // Initialize root note of tree.
- AhoCorasickNode root;
- root.set_failure(0);
- tree_.push_back(root);
-
- // Insert all patterns.
- for (SubstringPatternVector::const_iterator i = sorted_patterns.begin();
- i != sorted_patterns.end();
- ++i) {
- InsertPatternIntoAhoCorasickTree(*i);
- }
-
- CreateFailureEdges();
-}
-
-void SubstringSetMatcher::InsertPatternIntoAhoCorasickTree(
- const StringPattern* pattern) {
- const std::string& text = pattern->pattern();
- const std::string::const_iterator text_end = text.end();
-
- // Iterators on the tree and the text.
- uint32 current_node = 0;
- std::string::const_iterator i = text.begin();
-
- // Follow existing paths for as long as possible.
- while (i != text_end) {
- uint32 edge_from_current = tree_[current_node].GetEdge(*i);
- if (edge_from_current == AhoCorasickNode::kNoSuchEdge)
- break;
- current_node = edge_from_current;
- ++i;
- }
-
- // Create new nodes if necessary.
- while (i != text_end) {
- tree_.push_back(AhoCorasickNode());
- tree_[current_node].SetEdge(*i, tree_.size() - 1);
- current_node = tree_.size() - 1;
- ++i;
- }
-
- // Register match.
- tree_[current_node].AddMatch(pattern->id());
-}
-
-void SubstringSetMatcher::CreateFailureEdges() {
- typedef AhoCorasickNode::Edges Edges;
-
- std::queue<uint32> queue;
-
- AhoCorasickNode& root = tree_[0];
- root.set_failure(0);
- const Edges& root_edges = root.edges();
- for (Edges::const_iterator e = root_edges.begin(); e != root_edges.end();
- ++e) {
- const uint32& leads_to = e->second;
- tree_[leads_to].set_failure(0);
- queue.push(leads_to);
- }
-
- while (!queue.empty()) {
- AhoCorasickNode& current_node = tree_[queue.front()];
- queue.pop();
- for (Edges::const_iterator e = current_node.edges().begin();
- e != current_node.edges().end(); ++e) {
- const char& edge_label = e->first;
- const uint32& leads_to = e->second;
- queue.push(leads_to);
-
- uint32 failure = current_node.failure();
- uint32 edge_from_failure = tree_[failure].GetEdge(edge_label);
- while (edge_from_failure == AhoCorasickNode::kNoSuchEdge &&
- failure != 0) {
- failure = tree_[failure].failure();
- edge_from_failure = tree_[failure].GetEdge(edge_label);
- }
-
- const uint32 follow_in_case_of_failure =
- edge_from_failure != AhoCorasickNode::kNoSuchEdge
- ? edge_from_failure
- : 0;
- tree_[leads_to].set_failure(follow_in_case_of_failure);
- tree_[leads_to].AddMatches(tree_[follow_in_case_of_failure].matches());
- }
- }
-}
-
-const uint32 SubstringSetMatcher::AhoCorasickNode::kNoSuchEdge = ~0;
-
-SubstringSetMatcher::AhoCorasickNode::AhoCorasickNode()
- : failure_(kNoSuchEdge) {}
-
-SubstringSetMatcher::AhoCorasickNode::~AhoCorasickNode() {}
-
-SubstringSetMatcher::AhoCorasickNode::AhoCorasickNode(
- const SubstringSetMatcher::AhoCorasickNode& other)
- : edges_(other.edges_),
- failure_(other.failure_),
- matches_(other.matches_) {}
-
-SubstringSetMatcher::AhoCorasickNode&
-SubstringSetMatcher::AhoCorasickNode::operator=(
- const SubstringSetMatcher::AhoCorasickNode& other) {
- edges_ = other.edges_;
- failure_ = other.failure_;
- matches_ = other.matches_;
- return *this;
-}
-
-uint32 SubstringSetMatcher::AhoCorasickNode::GetEdge(char c) const {
- Edges::const_iterator i = edges_.find(c);
- return i == edges_.end() ? kNoSuchEdge : i->second;
-}
-
-void SubstringSetMatcher::AhoCorasickNode::SetEdge(char c, uint32 node) {
- edges_[c] = node;
-}
-
-void SubstringSetMatcher::AhoCorasickNode::AddMatch(StringPattern::ID id) {
- matches_.insert(id);
-}
-
-void SubstringSetMatcher::AhoCorasickNode::AddMatches(
- const SubstringSetMatcher::AhoCorasickNode::Matches& matches) {
- matches_.insert(matches.begin(), matches.end());
-}
-
-} // namespace url_matcher