summaryrefslogtreecommitdiffstats
path: root/chromium/v8/src/scanner.h
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/v8/src/scanner.h')
-rw-r--r--chromium/v8/src/scanner.h258
1 files changed, 136 insertions, 122 deletions
diff --git a/chromium/v8/src/scanner.h b/chromium/v8/src/scanner.h
index 3cefc833ac3..2979082e3f1 100644
--- a/chromium/v8/src/scanner.h
+++ b/chromium/v8/src/scanner.h
@@ -1,49 +1,29 @@
// Copyright 2011 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
// Features shared by parsing and pre-parsing scanners.
#ifndef V8_SCANNER_H_
#define V8_SCANNER_H_
-#include "allocation.h"
-#include "char-predicates.h"
-#include "checks.h"
-#include "globals.h"
-#include "hashmap.h"
-#include "list.h"
-#include "token.h"
-#include "unicode-inl.h"
-#include "utils.h"
+#include "src/allocation.h"
+#include "src/char-predicates.h"
+#include "src/checks.h"
+#include "src/globals.h"
+#include "src/hashmap.h"
+#include "src/list.h"
+#include "src/token.h"
+#include "src/unicode-inl.h"
+#include "src/utils.h"
namespace v8 {
namespace internal {
+class ParserRecorder;
+
+
// Returns the value (0 .. 15) of a hexadecimal character c.
// If c is not a legal hexadecimal character, returns a value < 0.
inline int HexValue(uc32 c) {
@@ -117,8 +97,8 @@ class Utf16CharacterStream {
virtual bool ReadBlock() = 0;
virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;
- const uc16* buffer_cursor_;
- const uc16* buffer_end_;
+ const uint16_t* buffer_cursor_;
+ const uint16_t* buffer_end_;
unsigned pos_;
};
@@ -139,12 +119,17 @@ class UnicodeCache {
bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); }
bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); }
bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); }
+ bool IsWhiteSpaceOrLineTerminator(unibrow::uchar c) {
+ return kIsWhiteSpaceOrLineTerminator.get(c);
+ }
private:
unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
- unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
+ unibrow::Predicate<WhiteSpace, 128> kIsWhiteSpace;
+ unibrow::Predicate<WhiteSpaceOrLineTerminator, 128>
+ kIsWhiteSpaceOrLineTerminator;
StaticResource<Utf8Decoder> utf8_decoder_;
DISALLOW_COPY_AND_ASSIGN(UnicodeCache);
@@ -161,32 +146,32 @@ class DuplicateFinder {
backing_store_(16),
map_(&Match) { }
- int AddAsciiSymbol(Vector<const char> key, int value);
- int AddUtf16Symbol(Vector<const uint16_t> key, int value);
+ int AddOneByteSymbol(Vector<const uint8_t> key, int value);
+ int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
// Add a a number literal by converting it (if necessary)
// to the string that ToString(ToNumber(literal)) would generate.
// and then adding that string with AddAsciiSymbol.
// This string is the actual value used as key in an object literal,
// and the one that must be different from the other keys.
- int AddNumber(Vector<const char> key, int value);
+ int AddNumber(Vector<const uint8_t> key, int value);
private:
- int AddSymbol(Vector<const byte> key, bool is_ascii, int value);
+ int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
// Backs up the key and its length in the backing store.
// The backup is stored with a base 127 encoding of the
- // length (plus a bit saying whether the string is ASCII),
+ // length (plus a bit saying whether the string is one byte),
// followed by the bytes of the key.
- byte* BackupKey(Vector<const byte> key, bool is_ascii);
+ uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
// Compare two encoded keys (both pointing into the backing store)
// for having the same base-127 encoded lengths and ASCII-ness,
// and then having the same 'length' bytes following.
static bool Match(void* first, void* second);
// Creates a hash from a sequence of bytes.
- static uint32_t Hash(Vector<const byte> key, bool is_ascii);
+ static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
// Checks whether a string containing a JS number is its canonical
// form.
- static bool IsNumberCanonical(Vector<const char> key);
+ static bool IsNumberCanonical(Vector<const uint8_t> key);
// Size of buffer. Sufficient for using it to call DoubleToCString in
// from conversions.h.
@@ -206,7 +191,7 @@ class DuplicateFinder {
class LiteralBuffer {
public:
- LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { }
+ LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }
~LiteralBuffer() {
if (backing_store_.length() > 0) {
@@ -216,48 +201,48 @@ class LiteralBuffer {
INLINE(void AddChar(uint32_t code_unit)) {
if (position_ >= backing_store_.length()) ExpandBuffer();
- if (is_ascii_) {
+ if (is_one_byte_) {
if (code_unit <= unibrow::Latin1::kMaxChar) {
backing_store_[position_] = static_cast<byte>(code_unit);
position_ += kOneByteSize;
return;
}
- ConvertToUtf16();
+ ConvertToTwoByte();
}
ASSERT(code_unit < 0x10000u);
- *reinterpret_cast<uc16*>(&backing_store_[position_]) = code_unit;
+ *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
position_ += kUC16Size;
}
- bool is_ascii() { return is_ascii_; }
+ bool is_one_byte() { return is_one_byte_; }
bool is_contextual_keyword(Vector<const char> keyword) {
- return is_ascii() && keyword.length() == position_ &&
+ return is_one_byte() && keyword.length() == position_ &&
(memcmp(keyword.start(), backing_store_.start(), position_) == 0);
}
- Vector<const uc16> utf16_literal() {
- ASSERT(!is_ascii_);
+ Vector<const uint16_t> two_byte_literal() {
+ ASSERT(!is_one_byte_);
ASSERT((position_ & 0x1) == 0);
- return Vector<const uc16>(
- reinterpret_cast<const uc16*>(backing_store_.start()),
+ return Vector<const uint16_t>(
+ reinterpret_cast<const uint16_t*>(backing_store_.start()),
position_ >> 1);
}
- Vector<const char> ascii_literal() {
- ASSERT(is_ascii_);
- return Vector<const char>(
- reinterpret_cast<const char*>(backing_store_.start()),
+ Vector<const uint8_t> one_byte_literal() {
+ ASSERT(is_one_byte_);
+ return Vector<const uint8_t>(
+ reinterpret_cast<const uint8_t*>(backing_store_.start()),
position_);
}
int length() {
- return is_ascii_ ? position_ : (position_ >> 1);
+ return is_one_byte_ ? position_ : (position_ >> 1);
}
void Reset() {
position_ = 0;
- is_ascii_ = true;
+ is_one_byte_ = true;
}
private:
@@ -273,13 +258,13 @@ class LiteralBuffer {
void ExpandBuffer() {
Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
- OS::MemCopy(new_store.start(), backing_store_.start(), position_);
+ MemCopy(new_store.start(), backing_store_.start(), position_);
backing_store_.Dispose();
backing_store_ = new_store;
}
- void ConvertToUtf16() {
- ASSERT(is_ascii_);
+ void ConvertToTwoByte() {
+ ASSERT(is_one_byte_);
Vector<byte> new_store;
int new_content_size = position_ * kUC16Size;
if (new_content_size >= backing_store_.length()) {
@@ -290,7 +275,7 @@ class LiteralBuffer {
new_store = backing_store_;
}
uint8_t* src = backing_store_.start();
- uc16* dst = reinterpret_cast<uc16*>(new_store.start());
+ uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
for (int i = position_ - 1; i >= 0; i--) {
dst[i] = src[i];
}
@@ -299,10 +284,10 @@ class LiteralBuffer {
backing_store_ = new_store;
}
position_ = new_content_size;
- is_ascii_ = false;
+ is_one_byte_ = false;
}
- bool is_ascii_;
+ bool is_one_byte_;
int position_;
Vector<byte> backing_store_;
@@ -365,32 +350,13 @@ class Scanner {
// Returns the location information for the current token
// (the token last returned by Next()).
Location location() const { return current_.location; }
- // Returns the literal string, if any, for the current token (the
- // token last returned by Next()). The string is 0-terminated.
- // Literal strings are collected for identifiers, strings, and
- // numbers.
- // These functions only give the correct result if the literal
- // was scanned between calls to StartLiteral() and TerminateLiteral().
- Vector<const char> literal_ascii_string() {
- ASSERT_NOT_NULL(current_.literal_chars);
- return current_.literal_chars->ascii_literal();
- }
- Vector<const uc16> literal_utf16_string() {
- ASSERT_NOT_NULL(current_.literal_chars);
- return current_.literal_chars->utf16_literal();
- }
- bool is_literal_ascii() {
- ASSERT_NOT_NULL(current_.literal_chars);
- return current_.literal_chars->is_ascii();
- }
- bool is_literal_contextual_keyword(Vector<const char> keyword) {
- ASSERT_NOT_NULL(current_.literal_chars);
- return current_.literal_chars->is_contextual_keyword(keyword);
- }
- int literal_length() const {
- ASSERT_NOT_NULL(current_.literal_chars);
- return current_.literal_chars->length();
- }
+
+ // Similar functions for the upcoming token.
+
+ // One token look-ahead (past the token returned by Next()).
+ Token::Value peek() const { return next_.token; }
+
+ Location peek_location() const { return next_.location; }
bool literal_contains_escapes() const {
Location location = current_.location;
@@ -401,43 +367,45 @@ class Scanner {
}
return current_.literal_chars->length() != source_length;
}
-
- // Similar functions for the upcoming token.
-
- // One token look-ahead (past the token returned by Next()).
- Token::Value peek() const { return next_.token; }
-
- Location peek_location() const { return next_.location; }
-
- // Returns the literal string for the next token (the token that
- // would be returned if Next() were called).
- Vector<const char> next_literal_ascii_string() {
- ASSERT_NOT_NULL(next_.literal_chars);
- return next_.literal_chars->ascii_literal();
- }
- Vector<const uc16> next_literal_utf16_string() {
- ASSERT_NOT_NULL(next_.literal_chars);
- return next_.literal_chars->utf16_literal();
- }
- bool is_next_literal_ascii() {
- ASSERT_NOT_NULL(next_.literal_chars);
- return next_.literal_chars->is_ascii();
+ bool is_literal_contextual_keyword(Vector<const char> keyword) {
+ ASSERT_NOT_NULL(current_.literal_chars);
+ return current_.literal_chars->is_contextual_keyword(keyword);
}
bool is_next_contextual_keyword(Vector<const char> keyword) {
ASSERT_NOT_NULL(next_.literal_chars);
return next_.literal_chars->is_contextual_keyword(keyword);
}
- int next_literal_length() const {
- ASSERT_NOT_NULL(next_.literal_chars);
- return next_.literal_chars->length();
- }
- UnicodeCache* unicode_cache() { return unicode_cache_; }
+ Handle<String> AllocateNextLiteralString(Isolate* isolate,
+ PretenureFlag tenured);
+ Handle<String> AllocateInternalizedString(Isolate* isolate);
- static const int kCharacterLookaheadBufferSize = 1;
+ double DoubleValue();
+ bool UnescapedLiteralMatches(const char* data, int length) {
+ if (is_literal_one_byte() &&
+ literal_length() == length &&
+ !literal_contains_escapes()) {
+ const char* token =
+ reinterpret_cast<const char*>(literal_one_byte_string().start());
+ return !strncmp(token, data, length);
+ }
+ return false;
+ }
+ void IsGetOrSet(bool* is_get, bool* is_set) {
+ if (is_literal_one_byte() &&
+ literal_length() == 3 &&
+ !literal_contains_escapes()) {
+ const char* token =
+ reinterpret_cast<const char*>(literal_one_byte_string().start());
+ *is_get = strncmp(token, "get", 3) == 0;
+ *is_set = !*is_get && strncmp(token, "set", 3) == 0;
+ }
+ }
- // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
- uc32 ScanOctalEscape(uc32 c, int length);
+ int FindNumber(DuplicateFinder* finder, int value);
+ int FindSymbol(DuplicateFinder* finder, int value);
+
+ UnicodeCache* unicode_cache() { return unicode_cache_; }
// Returns the location of the last seen octal literal.
Location octal_position() const { return octal_pos_; }
@@ -490,6 +458,11 @@ class Scanner {
LiteralBuffer* literal_chars;
};
+ static const int kCharacterLookaheadBufferSize = 1;
+
+ // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
+ uc32 ScanOctalEscape(uc32 c, int length);
+
// Call this after setting source_ to the input.
void Init() {
// Set c0_ (one character ahead)
@@ -550,6 +523,47 @@ class Scanner {
}
}
+ // Returns the literal string, if any, for the current token (the
+ // token last returned by Next()). The string is 0-terminated.
+ // Literal strings are collected for identifiers, strings, and
+ // numbers.
+ // These functions only give the correct result if the literal
+ // was scanned between calls to StartLiteral() and TerminateLiteral().
+ Vector<const uint8_t> literal_one_byte_string() {
+ ASSERT_NOT_NULL(current_.literal_chars);
+ return current_.literal_chars->one_byte_literal();
+ }
+ Vector<const uint16_t> literal_two_byte_string() {
+ ASSERT_NOT_NULL(current_.literal_chars);
+ return current_.literal_chars->two_byte_literal();
+ }
+ bool is_literal_one_byte() {
+ ASSERT_NOT_NULL(current_.literal_chars);
+ return current_.literal_chars->is_one_byte();
+ }
+ int literal_length() const {
+ ASSERT_NOT_NULL(current_.literal_chars);
+ return current_.literal_chars->length();
+ }
+ // Returns the literal string for the next token (the token that
+ // would be returned if Next() were called).
+ Vector<const uint8_t> next_literal_one_byte_string() {
+ ASSERT_NOT_NULL(next_.literal_chars);
+ return next_.literal_chars->one_byte_literal();
+ }
+ Vector<const uint16_t> next_literal_two_byte_string() {
+ ASSERT_NOT_NULL(next_.literal_chars);
+ return next_.literal_chars->two_byte_literal();
+ }
+ bool is_next_literal_one_byte() {
+ ASSERT_NOT_NULL(next_.literal_chars);
+ return next_.literal_chars->is_one_byte();
+ }
+ int next_literal_length() const {
+ ASSERT_NOT_NULL(next_.literal_chars);
+ return next_.literal_chars->length();
+ }
+
uc32 ScanHexNumber(int expected_length);
// Scans a single JavaScript token.