From d97195f011cdaa8b859d759f8a34dd50c3092f30 Mon Sep 17 00:00:00 2001 From: Aaron Kennedy Date: Tue, 4 Oct 2011 15:04:21 +1000 Subject: [PATCH 01/13] Add hashing and comparison methods to v8::String This allows us to more rapidly search for a v8::String inside a hash of QStrings. --- include/v8.h | 45 +++++++++++++++++++++++++++++ src/api.cc | 51 +++++++++++++++++++++++++++++++++ src/heap-inl.h | 2 + src/heap.cc | 3 ++ src/objects-inl.h | 1 + src/objects.cc | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++- src/objects.h | 10 ++++++- 7 files changed, 192 insertions(+), 2 deletions(-) diff --git a/include/v8.h b/include/v8.h index 73b7fbe..86ea70f 100644 --- a/include/v8.h +++ b/include/v8.h @@ -1021,6 +1021,49 @@ class String : public Primitive { V8EXPORT int Utf8Length() const; /** + * Returns the hash of this string. + */ + V8EXPORT uint32_t Hash() const; + + struct CompleteHashData { + CompleteHashData() : length(0), hash(0), symbol_id(0) {} + int length; + uint32_t hash; + uint32_t symbol_id; + }; + + /** + * Returns the "complete" hash of the string. This is + * all the information about the string needed to implement + * a very efficient hash keyed on the string. + * + * The members of CompleteHashData are: + * length: The length of the string. Equivalent to Length() + * hash: The hash of the string. Equivalent to Hash() + * symbol_id: If the string is a sequential symbol, the symbol + * id, otherwise 0. If the symbol ids of two strings are + * the same (and non-zero) the two strings are identical. + * If the symbol ids are different the strings may still be + * identical, but an Equals() check must be performed. + */ + V8EXPORT CompleteHashData CompleteHash() const; + + /** + * Compute a hash value for the passed UTF16 string + * data. + */ + V8EXPORT static uint32_t ComputeHash(uint16_t *string, int length); + V8EXPORT static uint32_t ComputeHash(char *string, int length); + + /** + * Returns true if this string is equal to the external + * string data provided. + */ + V8EXPORT bool Equals(uint16_t *string, int length); + V8EXPORT bool Equals(char *string, int length); + inline bool Equals(Handle that) const { return v8::Value::Equals(that); } + + /** * Write the contents of the string to an external buffer. * If no arguments are given, expects the buffer to be large * enough to hold the entire string and NULL terminator. Copies @@ -1051,6 +1094,8 @@ class String : public Primitive { NO_NULL_TERMINATION = 2 }; + V8EXPORT uint16_t GetCharacter(int index); + // 16-bit character codes. V8EXPORT int Write(uint16_t* buffer, int start = 0, diff --git a/src/api.cc b/src/api.cc index ac4f07f..996812e 100644 --- a/src/api.cc +++ b/src/api.cc @@ -3633,6 +3633,57 @@ int String::Utf8Length() const { } +uint32_t String::Hash() const { + i::Handle str = Utils::OpenHandle(this); + if (IsDeadCheck(str->GetIsolate(), "v8::String::Hash()")) return 0; + return str->Hash(); +} + + +String::CompleteHashData String::CompleteHash() const { + i::Handle str = Utils::OpenHandle(this); + if (IsDeadCheck(str->GetIsolate(), "v8::String::CompleteHash()")) return CompleteHashData(); + CompleteHashData result; + result.length = str->length(); + result.hash = str->Hash(); + if (str->IsSeqString()) + result.symbol_id = i::SeqString::cast(*str)->symbol_id(); + return result; +} + + +uint32_t String::ComputeHash(uint16_t *string, int length) { + return i::HashSequentialString(string, length) >> i::String::kHashShift; +} + + +uint32_t String::ComputeHash(char *string, int length) { + return i::HashSequentialString(string, length) >> i::String::kHashShift; +} + + +uint16_t String::GetCharacter(int index) +{ + i::Handle str = Utils::OpenHandle(this); + return str->Get(index); +} + + +bool String::Equals(uint16_t *string, int length) { + i::Handle str = Utils::OpenHandle(this); + if (IsDeadCheck(str->GetIsolate(), "v8::String::Equals()")) return 0; + return str->SlowEqualsExternal(string, length); +} + + +bool String::Equals(char *string, int length) +{ + i::Handle str = Utils::OpenHandle(this); + if (IsDeadCheck(str->GetIsolate(), "v8::String::Equals()")) return 0; + return str->SlowEqualsExternal(string, length); +} + + int String::WriteUtf8(char* buffer, int capacity, int* nchars_ref, diff --git a/src/heap-inl.h b/src/heap-inl.h index aaf2927..4c55d63 100644 --- a/src/heap-inl.h +++ b/src/heap-inl.h @@ -105,6 +105,7 @@ MaybeObject* Heap::AllocateAsciiSymbol(Vector str, String* answer = String::cast(result); answer->set_length(str.length()); answer->set_hash_field(hash_field); + SeqString::cast(answer)->set_symbol_id(0); ASSERT_EQ(size, answer->Size()); @@ -138,6 +139,7 @@ MaybeObject* Heap::AllocateTwoByteSymbol(Vector str, String* answer = String::cast(result); answer->set_length(str.length()); answer->set_hash_field(hash_field); + SeqString::cast(answer)->set_symbol_id(0); ASSERT_EQ(size, answer->Size()); diff --git a/src/heap.cc b/src/heap.cc index bbb9d3e..d287ead 100644 --- a/src/heap.cc +++ b/src/heap.cc @@ -4009,6 +4009,7 @@ MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer, String* answer = String::cast(result); answer->set_length(chars); answer->set_hash_field(hash_field); + SeqString::cast(answer)->set_symbol_id(0); ASSERT_EQ(size, answer->Size()); @@ -4051,6 +4052,7 @@ MaybeObject* Heap::AllocateRawAsciiString(int length, PretenureFlag pretenure) { HeapObject::cast(result)->set_map(ascii_string_map()); String::cast(result)->set_length(length); String::cast(result)->set_hash_field(String::kEmptyHashField); + SeqString::cast(result)->set_symbol_id(0); ASSERT_EQ(size, HeapObject::cast(result)->Size()); return result; } @@ -4086,6 +4088,7 @@ MaybeObject* Heap::AllocateRawTwoByteString(int length, HeapObject::cast(result)->set_map(string_map()); String::cast(result)->set_length(length); String::cast(result)->set_hash_field(String::kEmptyHashField); + SeqString::cast(result)->set_symbol_id(0); ASSERT_EQ(size, HeapObject::cast(result)->Size()); return result; } diff --git a/src/objects-inl.h b/src/objects-inl.h index dc3aa46..34cae9f 100644 --- a/src/objects-inl.h +++ b/src/objects-inl.h @@ -2082,6 +2082,7 @@ SMI_ACCESSORS(FixedArrayBase, length, kLengthOffset) SMI_ACCESSORS(FreeSpace, size, kSizeOffset) SMI_ACCESSORS(String, length, kLengthOffset) +SMI_ACCESSORS(SeqString, symbol_id, kSymbolIdOffset) uint32_t String::hash_field() { diff --git a/src/objects.cc b/src/objects.cc index 9a87ac5..2946d02 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -6716,6 +6716,71 @@ static inline bool CompareStringContentsPartial(Isolate* isolate, } +bool String::SlowEqualsExternal(uc16 *string, int length) { + int len = this->length(); + if (len != length) return false; + if (len == 0) return true; + + // We know the strings are both non-empty. Compare the first chars + // before we try to flatten the strings. + if (this->Get(0) != string[0]) return false; + + String* lhs = this->TryFlattenGetString(); + + if (lhs->IsFlat()) { + String::FlatContent lhs_content = lhs->GetFlatContent(); + if (lhs->IsAsciiRepresentation()) { + Vector vec1 = lhs_content.ToAsciiVector(); + VectorIterator buf1(vec1); + VectorIterator ib(string, length); + return CompareStringContents(&buf1, &ib); + } else { + Vector vec1 = lhs_content.ToUC16Vector(); + Vector vec2(string, length); + return CompareRawStringContents(vec1, vec2); + } + } else { + Isolate* isolate = GetIsolate(); + isolate->objects_string_compare_buffer_a()->Reset(0, lhs); + VectorIterator ib(string, length); + return CompareStringContents(isolate->objects_string_compare_buffer_a(), &ib); + } +} + + +bool String::SlowEqualsExternal(char *string, int length) +{ + int len = this->length(); + if (len != length) return false; + if (len == 0) return true; + + // We know the strings are both non-empty. Compare the first chars + // before we try to flatten the strings. + if (this->Get(0) != string[0]) return false; + + String* lhs = this->TryFlattenGetString(); + + if (StringShape(lhs).IsSequentialAscii()) { + const char* str1 = SeqAsciiString::cast(lhs)->GetChars(); + return CompareRawStringContents(Vector(str1, len), + Vector(string, len)); + } + + if (lhs->IsFlat()) { + String::FlatContent lhs_content = lhs->GetFlatContent(); + Vector vec1 = lhs_content.ToUC16Vector(); + VectorIterator buf1(vec1); + VectorIterator buf2(string, length); + return CompareStringContents(&buf1, &buf2); + } else { + Isolate* isolate = GetIsolate(); + isolate->objects_string_compare_buffer_a()->Reset(0, lhs); + VectorIterator ib(string, length); + return CompareStringContents(isolate->objects_string_compare_buffer_a(), &ib); + } +} + + bool String::SlowEquals(String* other) { // Fast check: negative check with lengths. int len = length(); @@ -10716,9 +10781,24 @@ class AsciiSymbolKey : public SequentialSymbolKey { MaybeObject* AsObject() { if (hash_field_ == 0) Hash(); - return HEAP->AllocateAsciiSymbol(string_, hash_field_); + MaybeObject *result = HEAP->AllocateAsciiSymbol(string_, hash_field_); + if (!result->IsFailure() && result->ToObjectUnchecked()->IsSeqString()) { + while (true) { + Atomic32 my_symbol_id = next_symbol_id; + if (my_symbol_id > Smi::kMaxValue) + break; + if (my_symbol_id == NoBarrier_CompareAndSwap(&next_symbol_id, my_symbol_id, my_symbol_id + 1)) { + SeqString::cast(result->ToObjectUnchecked())->set_symbol_id(my_symbol_id); + break; + } + } + } + return result; } + + static Atomic32 next_symbol_id; }; +Atomic32 AsciiSymbolKey::next_symbol_id = 1; class SubStringAsciiSymbolKey : public HashTableKey { diff --git a/src/objects.h b/src/objects.h index f7d2180..d96e5f9 100644 --- a/src/objects.h +++ b/src/objects.h @@ -6201,6 +6201,9 @@ class String: public HeapObject { bool IsAsciiEqualTo(Vector str); bool IsTwoByteEqualTo(Vector str); + bool SlowEqualsExternal(uc16 *string, int length); + bool SlowEqualsExternal(char *string, int length); + // Return a UTF8 representation of the string. The string is null // terminated but may optionally contain nulls. Length is returned // in length_output if length_output is not a null pointer The string @@ -6457,8 +6460,13 @@ class SeqString: public String { // Casting. static inline SeqString* cast(Object* obj); + // Get and set the symbol id of the string + inline int symbol_id(); + inline void set_symbol_id(int value); + // Layout description. - static const int kHeaderSize = String::kSize; + static const int kSymbolIdOffset = String::kSize; + static const int kHeaderSize = kSymbolIdOffset + kPointerSize; private: DISALLOW_IMPLICIT_CONSTRUCTORS(SeqString); -- 1.7.4.1