diff options
Diffstat (limited to 'chromium/v8/src/mips/codegen-mips.cc')
-rw-r--r-- | chromium/v8/src/mips/codegen-mips.cc | 705 |
1 files changed, 611 insertions, 94 deletions
diff --git a/chromium/v8/src/mips/codegen-mips.cc b/chromium/v8/src/mips/codegen-mips.cc index 3a87c5af886..5d613d0fb0a 100644 --- a/chromium/v8/src/mips/codegen-mips.cc +++ b/chromium/v8/src/mips/codegen-mips.cc @@ -1,54 +1,19 @@ // Copyright 2012 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "v8.h" +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/v8.h" #if V8_TARGET_ARCH_MIPS -#include "codegen.h" -#include "macro-assembler.h" -#include "simulator-mips.h" +#include "src/codegen.h" +#include "src/macro-assembler.h" +#include "src/mips/simulator-mips.h" namespace v8 { namespace internal { -UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type) { - switch (type) { - case TranscendentalCache::SIN: return &sin; - case TranscendentalCache::COS: return &cos; - case TranscendentalCache::TAN: return &tan; - case TranscendentalCache::LOG: return &log; - default: UNIMPLEMENTED(); - } - return NULL; -} - - #define __ masm. @@ -62,10 +27,10 @@ double fast_exp_simulator(double x) { UnaryMathFunction CreateExpFunction() { - if (!FLAG_fast_math) return &exp; + if (!FLAG_fast_math) return &std::exp; size_t actual_size; byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true)); - if (buffer == NULL) return &exp; + if (buffer == NULL) return &std::exp; ExternalReference::InitializeMathExpData(); MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size)); @@ -79,21 +44,13 @@ UnaryMathFunction CreateExpFunction() { Register temp2 = t1; Register temp3 = t2; - if (!IsMipsSoftFloatABI) { - // Input value is in f12 anyway, nothing to do. - } else { - __ Move(input, a0, a1); - } + __ MovFromFloatParameter(input); __ Push(temp3, temp2, temp1); MathExpGenerator::EmitMathExp( &masm, input, result, double_scratch1, double_scratch2, temp1, temp2, temp3); __ Pop(temp3, temp2, temp1); - if (!IsMipsSoftFloatABI) { - // Result is already in f0, nothing to do. - } else { - __ Move(v0, v1, result); - } + __ MovToFloatResult(result); __ Ret(); } @@ -113,13 +70,564 @@ UnaryMathFunction CreateExpFunction() { } -#undef __ +#if defined(V8_HOST_ARCH_MIPS) +MemCopyUint8Function CreateMemCopyUint8Function(MemCopyUint8Function stub) { +#if defined(USE_SIMULATOR) + return stub; +#else + size_t actual_size; + byte* buffer = static_cast<byte*>(OS::Allocate(3 * KB, &actual_size, true)); + if (buffer == NULL) return stub; + + // This code assumes that cache lines are 32 bytes and if the cache line is + // larger it will not work correctly. + MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size)); + + { + Label lastb, unaligned, aligned, chkw, + loop16w, chk1w, wordCopy_loop, skip_pref, lastbloop, + leave, ua_chk16w, ua_loop16w, ua_skip_pref, ua_chkw, + ua_chk1w, ua_wordCopy_loop, ua_smallCopy, ua_smallCopy_loop; + + // The size of each prefetch. + uint32_t pref_chunk = 32; + // The maximum size of a prefetch, it must not be less then pref_chunk. + // If the real size of a prefetch is greater then max_pref_size and + // the kPrefHintPrepareForStore hint is used, the code will not work + // correctly. + uint32_t max_pref_size = 128; + ASSERT(pref_chunk < max_pref_size); + + // pref_limit is set based on the fact that we never use an offset + // greater then 5 on a store pref and that a single pref can + // never be larger then max_pref_size. + uint32_t pref_limit = (5 * pref_chunk) + max_pref_size; + int32_t pref_hint_load = kPrefHintLoadStreamed; + int32_t pref_hint_store = kPrefHintPrepareForStore; + uint32_t loadstore_chunk = 4; + + // The initial prefetches may fetch bytes that are before the buffer being + // copied. Start copies with an offset of 4 so avoid this situation when + // using kPrefHintPrepareForStore. + ASSERT(pref_hint_store != kPrefHintPrepareForStore || + pref_chunk * 4 >= max_pref_size); + + // If the size is less than 8, go to lastb. Regardless of size, + // copy dst pointer to v0 for the retuen value. + __ slti(t2, a2, 2 * loadstore_chunk); + __ bne(t2, zero_reg, &lastb); + __ mov(v0, a0); // In delay slot. + + // If src and dst have different alignments, go to unaligned, if they + // have the same alignment (but are not actually aligned) do a partial + // load/store to make them aligned. If they are both already aligned + // we can start copying at aligned. + __ xor_(t8, a1, a0); + __ andi(t8, t8, loadstore_chunk - 1); // t8 is a0/a1 word-displacement. + __ bne(t8, zero_reg, &unaligned); + __ subu(a3, zero_reg, a0); // In delay slot. + + __ andi(a3, a3, loadstore_chunk - 1); // Copy a3 bytes to align a0/a1. + __ beq(a3, zero_reg, &aligned); // Already aligned. + __ subu(a2, a2, a3); // In delay slot. a2 is the remining bytes count. + + if (kArchEndian == kLittle) { + __ lwr(t8, MemOperand(a1)); + __ addu(a1, a1, a3); + __ swr(t8, MemOperand(a0)); + __ addu(a0, a0, a3); + } else { + __ lwl(t8, MemOperand(a1)); + __ addu(a1, a1, a3); + __ swl(t8, MemOperand(a0)); + __ addu(a0, a0, a3); + } + // Now dst/src are both aligned to (word) aligned addresses. Set a2 to + // count how many bytes we have to copy after all the 64 byte chunks are + // copied and a3 to the dst pointer after all the 64 byte chunks have been + // copied. We will loop, incrementing a0 and a1 until a0 equals a3. + __ bind(&aligned); + __ andi(t8, a2, 0x3f); + __ beq(a2, t8, &chkw); // Less than 64? + __ subu(a3, a2, t8); // In delay slot. + __ addu(a3, a0, a3); // Now a3 is the final dst after loop. + + // When in the loop we prefetch with kPrefHintPrepareForStore hint, + // in this case the a0+x should be past the "t0-32" address. This means: + // for x=128 the last "safe" a0 address is "t0-160". Alternatively, for + // x=64 the last "safe" a0 address is "t0-96". In the current version we + // will use "pref hint, 128(a0)", so "t0-160" is the limit. + if (pref_hint_store == kPrefHintPrepareForStore) { + __ addu(t0, a0, a2); // t0 is the "past the end" address. + __ Subu(t9, t0, pref_limit); // t9 is the "last safe pref" address. + } + + __ Pref(pref_hint_load, MemOperand(a1, 0 * pref_chunk)); + __ Pref(pref_hint_load, MemOperand(a1, 1 * pref_chunk)); + __ Pref(pref_hint_load, MemOperand(a1, 2 * pref_chunk)); + __ Pref(pref_hint_load, MemOperand(a1, 3 * pref_chunk)); + + if (pref_hint_store != kPrefHintPrepareForStore) { + __ Pref(pref_hint_store, MemOperand(a0, 1 * pref_chunk)); + __ Pref(pref_hint_store, MemOperand(a0, 2 * pref_chunk)); + __ Pref(pref_hint_store, MemOperand(a0, 3 * pref_chunk)); + } + __ bind(&loop16w); + __ lw(t0, MemOperand(a1)); + + if (pref_hint_store == kPrefHintPrepareForStore) { + __ sltu(v1, t9, a0); // If a0 > t9, don't use next prefetch. + __ Branch(USE_DELAY_SLOT, &skip_pref, gt, v1, Operand(zero_reg)); + } + __ lw(t1, MemOperand(a1, 1, loadstore_chunk)); // Maybe in delay slot. + + __ Pref(pref_hint_store, MemOperand(a0, 4 * pref_chunk)); + __ Pref(pref_hint_store, MemOperand(a0, 5 * pref_chunk)); + + __ bind(&skip_pref); + __ lw(t2, MemOperand(a1, 2, loadstore_chunk)); + __ lw(t3, MemOperand(a1, 3, loadstore_chunk)); + __ lw(t4, MemOperand(a1, 4, loadstore_chunk)); + __ lw(t5, MemOperand(a1, 5, loadstore_chunk)); + __ lw(t6, MemOperand(a1, 6, loadstore_chunk)); + __ lw(t7, MemOperand(a1, 7, loadstore_chunk)); + __ Pref(pref_hint_load, MemOperand(a1, 4 * pref_chunk)); + + __ sw(t0, MemOperand(a0)); + __ sw(t1, MemOperand(a0, 1, loadstore_chunk)); + __ sw(t2, MemOperand(a0, 2, loadstore_chunk)); + __ sw(t3, MemOperand(a0, 3, loadstore_chunk)); + __ sw(t4, MemOperand(a0, 4, loadstore_chunk)); + __ sw(t5, MemOperand(a0, 5, loadstore_chunk)); + __ sw(t6, MemOperand(a0, 6, loadstore_chunk)); + __ sw(t7, MemOperand(a0, 7, loadstore_chunk)); + + __ lw(t0, MemOperand(a1, 8, loadstore_chunk)); + __ lw(t1, MemOperand(a1, 9, loadstore_chunk)); + __ lw(t2, MemOperand(a1, 10, loadstore_chunk)); + __ lw(t3, MemOperand(a1, 11, loadstore_chunk)); + __ lw(t4, MemOperand(a1, 12, loadstore_chunk)); + __ lw(t5, MemOperand(a1, 13, loadstore_chunk)); + __ lw(t6, MemOperand(a1, 14, loadstore_chunk)); + __ lw(t7, MemOperand(a1, 15, loadstore_chunk)); + __ Pref(pref_hint_load, MemOperand(a1, 5 * pref_chunk)); + + __ sw(t0, MemOperand(a0, 8, loadstore_chunk)); + __ sw(t1, MemOperand(a0, 9, loadstore_chunk)); + __ sw(t2, MemOperand(a0, 10, loadstore_chunk)); + __ sw(t3, MemOperand(a0, 11, loadstore_chunk)); + __ sw(t4, MemOperand(a0, 12, loadstore_chunk)); + __ sw(t5, MemOperand(a0, 13, loadstore_chunk)); + __ sw(t6, MemOperand(a0, 14, loadstore_chunk)); + __ sw(t7, MemOperand(a0, 15, loadstore_chunk)); + __ addiu(a0, a0, 16 * loadstore_chunk); + __ bne(a0, a3, &loop16w); + __ addiu(a1, a1, 16 * loadstore_chunk); // In delay slot. + __ mov(a2, t8); + + // Here we have src and dest word-aligned but less than 64-bytes to go. + // Check for a 32 bytes chunk and copy if there is one. Otherwise jump + // down to chk1w to handle the tail end of the copy. + __ bind(&chkw); + __ Pref(pref_hint_load, MemOperand(a1, 0 * pref_chunk)); + __ andi(t8, a2, 0x1f); + __ beq(a2, t8, &chk1w); // Less than 32? + __ nop(); // In delay slot. + __ lw(t0, MemOperand(a1)); + __ lw(t1, MemOperand(a1, 1, loadstore_chunk)); + __ lw(t2, MemOperand(a1, 2, loadstore_chunk)); + __ lw(t3, MemOperand(a1, 3, loadstore_chunk)); + __ lw(t4, MemOperand(a1, 4, loadstore_chunk)); + __ lw(t5, MemOperand(a1, 5, loadstore_chunk)); + __ lw(t6, MemOperand(a1, 6, loadstore_chunk)); + __ lw(t7, MemOperand(a1, 7, loadstore_chunk)); + __ addiu(a1, a1, 8 * loadstore_chunk); + __ sw(t0, MemOperand(a0)); + __ sw(t1, MemOperand(a0, 1, loadstore_chunk)); + __ sw(t2, MemOperand(a0, 2, loadstore_chunk)); + __ sw(t3, MemOperand(a0, 3, loadstore_chunk)); + __ sw(t4, MemOperand(a0, 4, loadstore_chunk)); + __ sw(t5, MemOperand(a0, 5, loadstore_chunk)); + __ sw(t6, MemOperand(a0, 6, loadstore_chunk)); + __ sw(t7, MemOperand(a0, 7, loadstore_chunk)); + __ addiu(a0, a0, 8 * loadstore_chunk); + + // Here we have less than 32 bytes to copy. Set up for a loop to copy + // one word at a time. Set a2 to count how many bytes we have to copy + // after all the word chunks are copied and a3 to the dst pointer after + // all the word chunks have been copied. We will loop, incrementing a0 + // and a1 untill a0 equals a3. + __ bind(&chk1w); + __ andi(a2, t8, loadstore_chunk - 1); + __ beq(a2, t8, &lastb); + __ subu(a3, t8, a2); // In delay slot. + __ addu(a3, a0, a3); + + __ bind(&wordCopy_loop); + __ lw(t3, MemOperand(a1)); + __ addiu(a0, a0, loadstore_chunk); + __ addiu(a1, a1, loadstore_chunk); + __ bne(a0, a3, &wordCopy_loop); + __ sw(t3, MemOperand(a0, -1, loadstore_chunk)); // In delay slot. + + __ bind(&lastb); + __ Branch(&leave, le, a2, Operand(zero_reg)); + __ addu(a3, a0, a2); + + __ bind(&lastbloop); + __ lb(v1, MemOperand(a1)); + __ addiu(a0, a0, 1); + __ addiu(a1, a1, 1); + __ bne(a0, a3, &lastbloop); + __ sb(v1, MemOperand(a0, -1)); // In delay slot. + + __ bind(&leave); + __ jr(ra); + __ nop(); + + // Unaligned case. Only the dst gets aligned so we need to do partial + // loads of the source followed by normal stores to the dst (once we + // have aligned the destination). + __ bind(&unaligned); + __ andi(a3, a3, loadstore_chunk - 1); // Copy a3 bytes to align a0/a1. + __ beq(a3, zero_reg, &ua_chk16w); + __ subu(a2, a2, a3); // In delay slot. + + if (kArchEndian == kLittle) { + __ lwr(v1, MemOperand(a1)); + __ lwl(v1, + MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one)); + __ addu(a1, a1, a3); + __ swr(v1, MemOperand(a0)); + __ addu(a0, a0, a3); + } else { + __ lwl(v1, MemOperand(a1)); + __ lwr(v1, + MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one)); + __ addu(a1, a1, a3); + __ swl(v1, MemOperand(a0)); + __ addu(a0, a0, a3); + } + + // Now the dst (but not the source) is aligned. Set a2 to count how many + // bytes we have to copy after all the 64 byte chunks are copied and a3 to + // the dst pointer after all the 64 byte chunks have been copied. We will + // loop, incrementing a0 and a1 until a0 equals a3. + __ bind(&ua_chk16w); + __ andi(t8, a2, 0x3f); + __ beq(a2, t8, &ua_chkw); + __ subu(a3, a2, t8); // In delay slot. + __ addu(a3, a0, a3); + + if (pref_hint_store == kPrefHintPrepareForStore) { + __ addu(t0, a0, a2); + __ Subu(t9, t0, pref_limit); + } + + __ Pref(pref_hint_load, MemOperand(a1, 0 * pref_chunk)); + __ Pref(pref_hint_load, MemOperand(a1, 1 * pref_chunk)); + __ Pref(pref_hint_load, MemOperand(a1, 2 * pref_chunk)); + + if (pref_hint_store != kPrefHintPrepareForStore) { + __ Pref(pref_hint_store, MemOperand(a0, 1 * pref_chunk)); + __ Pref(pref_hint_store, MemOperand(a0, 2 * pref_chunk)); + __ Pref(pref_hint_store, MemOperand(a0, 3 * pref_chunk)); + } + + __ bind(&ua_loop16w); + __ Pref(pref_hint_load, MemOperand(a1, 3 * pref_chunk)); + if (kArchEndian == kLittle) { + __ lwr(t0, MemOperand(a1)); + __ lwr(t1, MemOperand(a1, 1, loadstore_chunk)); + __ lwr(t2, MemOperand(a1, 2, loadstore_chunk)); + + if (pref_hint_store == kPrefHintPrepareForStore) { + __ sltu(v1, t9, a0); + __ Branch(USE_DELAY_SLOT, &ua_skip_pref, gt, v1, Operand(zero_reg)); + } + __ lwr(t3, MemOperand(a1, 3, loadstore_chunk)); // Maybe in delay slot. + + __ Pref(pref_hint_store, MemOperand(a0, 4 * pref_chunk)); + __ Pref(pref_hint_store, MemOperand(a0, 5 * pref_chunk)); + + __ bind(&ua_skip_pref); + __ lwr(t4, MemOperand(a1, 4, loadstore_chunk)); + __ lwr(t5, MemOperand(a1, 5, loadstore_chunk)); + __ lwr(t6, MemOperand(a1, 6, loadstore_chunk)); + __ lwr(t7, MemOperand(a1, 7, loadstore_chunk)); + __ lwl(t0, + MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t1, + MemOperand(a1, 2, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t2, + MemOperand(a1, 3, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t3, + MemOperand(a1, 4, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t4, + MemOperand(a1, 5, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t5, + MemOperand(a1, 6, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t6, + MemOperand(a1, 7, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t7, + MemOperand(a1, 8, loadstore_chunk, MemOperand::offset_minus_one)); + } else { + __ lwl(t0, MemOperand(a1)); + __ lwl(t1, MemOperand(a1, 1, loadstore_chunk)); + __ lwl(t2, MemOperand(a1, 2, loadstore_chunk)); + + if (pref_hint_store == kPrefHintPrepareForStore) { + __ sltu(v1, t9, a0); + __ Branch(USE_DELAY_SLOT, &ua_skip_pref, gt, v1, Operand(zero_reg)); + } + __ lwl(t3, MemOperand(a1, 3, loadstore_chunk)); // Maybe in delay slot. + + __ Pref(pref_hint_store, MemOperand(a0, 4 * pref_chunk)); + __ Pref(pref_hint_store, MemOperand(a0, 5 * pref_chunk)); + + __ bind(&ua_skip_pref); + __ lwl(t4, MemOperand(a1, 4, loadstore_chunk)); + __ lwl(t5, MemOperand(a1, 5, loadstore_chunk)); + __ lwl(t6, MemOperand(a1, 6, loadstore_chunk)); + __ lwl(t7, MemOperand(a1, 7, loadstore_chunk)); + __ lwr(t0, + MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t1, + MemOperand(a1, 2, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t2, + MemOperand(a1, 3, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t3, + MemOperand(a1, 4, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t4, + MemOperand(a1, 5, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t5, + MemOperand(a1, 6, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t6, + MemOperand(a1, 7, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t7, + MemOperand(a1, 8, loadstore_chunk, MemOperand::offset_minus_one)); + } + __ Pref(pref_hint_load, MemOperand(a1, 4 * pref_chunk)); + __ sw(t0, MemOperand(a0)); + __ sw(t1, MemOperand(a0, 1, loadstore_chunk)); + __ sw(t2, MemOperand(a0, 2, loadstore_chunk)); + __ sw(t3, MemOperand(a0, 3, loadstore_chunk)); + __ sw(t4, MemOperand(a0, 4, loadstore_chunk)); + __ sw(t5, MemOperand(a0, 5, loadstore_chunk)); + __ sw(t6, MemOperand(a0, 6, loadstore_chunk)); + __ sw(t7, MemOperand(a0, 7, loadstore_chunk)); + if (kArchEndian == kLittle) { + __ lwr(t0, MemOperand(a1, 8, loadstore_chunk)); + __ lwr(t1, MemOperand(a1, 9, loadstore_chunk)); + __ lwr(t2, MemOperand(a1, 10, loadstore_chunk)); + __ lwr(t3, MemOperand(a1, 11, loadstore_chunk)); + __ lwr(t4, MemOperand(a1, 12, loadstore_chunk)); + __ lwr(t5, MemOperand(a1, 13, loadstore_chunk)); + __ lwr(t6, MemOperand(a1, 14, loadstore_chunk)); + __ lwr(t7, MemOperand(a1, 15, loadstore_chunk)); + __ lwl(t0, + MemOperand(a1, 9, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t1, + MemOperand(a1, 10, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t2, + MemOperand(a1, 11, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t3, + MemOperand(a1, 12, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t4, + MemOperand(a1, 13, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t5, + MemOperand(a1, 14, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t6, + MemOperand(a1, 15, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t7, + MemOperand(a1, 16, loadstore_chunk, MemOperand::offset_minus_one)); + } else { + __ lwl(t0, MemOperand(a1, 8, loadstore_chunk)); + __ lwl(t1, MemOperand(a1, 9, loadstore_chunk)); + __ lwl(t2, MemOperand(a1, 10, loadstore_chunk)); + __ lwl(t3, MemOperand(a1, 11, loadstore_chunk)); + __ lwl(t4, MemOperand(a1, 12, loadstore_chunk)); + __ lwl(t5, MemOperand(a1, 13, loadstore_chunk)); + __ lwl(t6, MemOperand(a1, 14, loadstore_chunk)); + __ lwl(t7, MemOperand(a1, 15, loadstore_chunk)); + __ lwr(t0, + MemOperand(a1, 9, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t1, + MemOperand(a1, 10, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t2, + MemOperand(a1, 11, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t3, + MemOperand(a1, 12, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t4, + MemOperand(a1, 13, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t5, + MemOperand(a1, 14, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t6, + MemOperand(a1, 15, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t7, + MemOperand(a1, 16, loadstore_chunk, MemOperand::offset_minus_one)); + } + __ Pref(pref_hint_load, MemOperand(a1, 5 * pref_chunk)); + __ sw(t0, MemOperand(a0, 8, loadstore_chunk)); + __ sw(t1, MemOperand(a0, 9, loadstore_chunk)); + __ sw(t2, MemOperand(a0, 10, loadstore_chunk)); + __ sw(t3, MemOperand(a0, 11, loadstore_chunk)); + __ sw(t4, MemOperand(a0, 12, loadstore_chunk)); + __ sw(t5, MemOperand(a0, 13, loadstore_chunk)); + __ sw(t6, MemOperand(a0, 14, loadstore_chunk)); + __ sw(t7, MemOperand(a0, 15, loadstore_chunk)); + __ addiu(a0, a0, 16 * loadstore_chunk); + __ bne(a0, a3, &ua_loop16w); + __ addiu(a1, a1, 16 * loadstore_chunk); // In delay slot. + __ mov(a2, t8); + + // Here less than 64-bytes. Check for + // a 32 byte chunk and copy if there is one. Otherwise jump down to + // ua_chk1w to handle the tail end of the copy. + __ bind(&ua_chkw); + __ Pref(pref_hint_load, MemOperand(a1)); + __ andi(t8, a2, 0x1f); + + __ beq(a2, t8, &ua_chk1w); + __ nop(); // In delay slot. + if (kArchEndian == kLittle) { + __ lwr(t0, MemOperand(a1)); + __ lwr(t1, MemOperand(a1, 1, loadstore_chunk)); + __ lwr(t2, MemOperand(a1, 2, loadstore_chunk)); + __ lwr(t3, MemOperand(a1, 3, loadstore_chunk)); + __ lwr(t4, MemOperand(a1, 4, loadstore_chunk)); + __ lwr(t5, MemOperand(a1, 5, loadstore_chunk)); + __ lwr(t6, MemOperand(a1, 6, loadstore_chunk)); + __ lwr(t7, MemOperand(a1, 7, loadstore_chunk)); + __ lwl(t0, + MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t1, + MemOperand(a1, 2, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t2, + MemOperand(a1, 3, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t3, + MemOperand(a1, 4, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t4, + MemOperand(a1, 5, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t5, + MemOperand(a1, 6, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t6, + MemOperand(a1, 7, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwl(t7, + MemOperand(a1, 8, loadstore_chunk, MemOperand::offset_minus_one)); + } else { + __ lwl(t0, MemOperand(a1)); + __ lwl(t1, MemOperand(a1, 1, loadstore_chunk)); + __ lwl(t2, MemOperand(a1, 2, loadstore_chunk)); + __ lwl(t3, MemOperand(a1, 3, loadstore_chunk)); + __ lwl(t4, MemOperand(a1, 4, loadstore_chunk)); + __ lwl(t5, MemOperand(a1, 5, loadstore_chunk)); + __ lwl(t6, MemOperand(a1, 6, loadstore_chunk)); + __ lwl(t7, MemOperand(a1, 7, loadstore_chunk)); + __ lwr(t0, + MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t1, + MemOperand(a1, 2, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t2, + MemOperand(a1, 3, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t3, + MemOperand(a1, 4, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t4, + MemOperand(a1, 5, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t5, + MemOperand(a1, 6, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t6, + MemOperand(a1, 7, loadstore_chunk, MemOperand::offset_minus_one)); + __ lwr(t7, + MemOperand(a1, 8, loadstore_chunk, MemOperand::offset_minus_one)); + } + __ addiu(a1, a1, 8 * loadstore_chunk); + __ sw(t0, MemOperand(a0)); + __ sw(t1, MemOperand(a0, 1, loadstore_chunk)); + __ sw(t2, MemOperand(a0, 2, loadstore_chunk)); + __ sw(t3, MemOperand(a0, 3, loadstore_chunk)); + __ sw(t4, MemOperand(a0, 4, loadstore_chunk)); + __ sw(t5, MemOperand(a0, 5, loadstore_chunk)); + __ sw(t6, MemOperand(a0, 6, loadstore_chunk)); + __ sw(t7, MemOperand(a0, 7, loadstore_chunk)); + __ addiu(a0, a0, 8 * loadstore_chunk); + + // Less than 32 bytes to copy. Set up for a loop to + // copy one word at a time. + __ bind(&ua_chk1w); + __ andi(a2, t8, loadstore_chunk - 1); + __ beq(a2, t8, &ua_smallCopy); + __ subu(a3, t8, a2); // In delay slot. + __ addu(a3, a0, a3); + + __ bind(&ua_wordCopy_loop); + if (kArchEndian == kLittle) { + __ lwr(v1, MemOperand(a1)); + __ lwl(v1, + MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one)); + } else { + __ lwl(v1, MemOperand(a1)); + __ lwr(v1, + MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one)); + } + __ addiu(a0, a0, loadstore_chunk); + __ addiu(a1, a1, loadstore_chunk); + __ bne(a0, a3, &ua_wordCopy_loop); + __ sw(v1, MemOperand(a0, -1, loadstore_chunk)); // In delay slot. + + // Copy the last 8 bytes. + __ bind(&ua_smallCopy); + __ beq(a2, zero_reg, &leave); + __ addu(a3, a0, a2); // In delay slot. + + __ bind(&ua_smallCopy_loop); + __ lb(v1, MemOperand(a1)); + __ addiu(a0, a0, 1); + __ addiu(a1, a1, 1); + __ bne(a0, a3, &ua_smallCopy_loop); + __ sb(v1, MemOperand(a0, -1)); // In delay slot. + + __ jr(ra); + __ nop(); + } + CodeDesc desc; + masm.GetCode(&desc); + ASSERT(!RelocInfo::RequiresRelocation(desc)); + CPU::FlushICache(buffer, actual_size); + OS::ProtectCode(buffer, actual_size); + return FUNCTION_CAST<MemCopyUint8Function>(buffer); +#endif +} +#endif UnaryMathFunction CreateSqrtFunction() { - return &sqrt; +#if defined(USE_SIMULATOR) + return &std::sqrt; +#else + size_t actual_size; + byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true)); + if (buffer == NULL) return &std::sqrt; + + MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size)); + + __ MovFromFloatParameter(f12); + __ sqrt_d(f0, f12); + __ MovToFloatResult(f0); + __ Ret(); + + CodeDesc desc; + masm.GetCode(&desc); + ASSERT(!RelocInfo::RequiresRelocation(desc)); + + CPU::FlushICache(buffer, actual_size); + OS::ProtectCode(buffer, actual_size); + return FUNCTION_CAST<UnaryMathFunction>(buffer); +#endif } +#undef __ + // ------------------------------------------------------------------------- // Platform-specific RuntimeCallHelper functions. @@ -290,8 +798,8 @@ void ElementsTransitionGenerator::GenerateSmiToDouble( __ LoadRoot(at, Heap::kTheHoleValueRootIndex); __ Assert(eq, kObjectFoundInSmiOnlyArray, at, Operand(t5)); } - __ sw(t0, MemOperand(t3)); // mantissa - __ sw(t1, MemOperand(t3, kIntSize)); // exponent + __ sw(t0, MemOperand(t3, Register::kMantissaOffset)); // mantissa + __ sw(t1, MemOperand(t3, Register::kExponentOffset)); // exponent __ Addu(t3, t3, kDoubleSize); __ bind(&entry); @@ -341,7 +849,9 @@ void ElementsTransitionGenerator::GenerateDoubleToObject( __ sw(t5, MemOperand(t2, HeapObject::kMapOffset)); // Prepare for conversion loop. - __ Addu(t0, t0, Operand(FixedDoubleArray::kHeaderSize - kHeapObjectTag + 4)); + __ Addu(t0, t0, Operand( + FixedDoubleArray::kHeaderSize - kHeapObjectTag + + Register::kExponentOffset)); __ Addu(a3, t2, Operand(FixedArray::kHeaderSize)); __ Addu(t2, t2, Operand(kHeapObjectTag)); __ sll(t1, t1, 1); @@ -350,7 +860,8 @@ void ElementsTransitionGenerator::GenerateDoubleToObject( __ LoadRoot(t5, Heap::kHeapNumberMapRootIndex); // Using offsetted addresses. // a3: begin of destination FixedArray element fields, not tagged - // t0: begin of source FixedDoubleArray element fields, not tagged, +4 + // t0: begin of source FixedDoubleArray element fields, not tagged, + // points to the exponent // t1: end of destination FixedArray, not tagged // t2: destination FixedArray // t3: the-hole pointer @@ -373,7 +884,9 @@ void ElementsTransitionGenerator::GenerateDoubleToObject( // Non-hole double, copy value into a heap number. __ AllocateHeapNumber(a2, a0, t6, t5, &gc_required); // a2: new heap number - __ lw(a0, MemOperand(t0, -12)); + // Load mantissa of current element, t0 point to exponent of next element. + __ lw(a0, MemOperand(t0, (Register::kMantissaOffset + - Register::kExponentOffset - kDoubleSize))); __ sw(a0, FieldMemOperand(a2, HeapNumber::kMantissaOffset)); __ sw(a1, FieldMemOperand(a2, HeapNumber::kExponentOffset)); __ mov(a0, a3); @@ -492,7 +1005,7 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, at, Operand(zero_reg)); } // Rule out short external strings. - STATIC_CHECK(kShortExternalStringTag != 0); + STATIC_ASSERT(kShortExternalStringTag != 0); __ And(at, result, Operand(kShortExternalStringMask)); __ Branch(call_runtime, ne, at, Operand(zero_reg)); __ lw(string, FieldMemOperand(string, ExternalString::kResourceDataOffset)); @@ -578,8 +1091,8 @@ void MathExpGenerator::EmitMathExp(MacroAssembler* masm, __ li(temp3, Operand(ExternalReference::math_exp_log_table())); __ sll(at, temp2, 3); __ Addu(temp3, temp3, Operand(at)); - __ lw(temp2, MemOperand(temp3, 0)); - __ lw(temp3, MemOperand(temp3, kPointerSize)); + __ lw(temp2, MemOperand(temp3, Register::kMantissaOffset)); + __ lw(temp3, MemOperand(temp3, Register::kExponentOffset)); // The first word is loaded is the lower number register. if (temp2.code() < temp3.code()) { __ sll(at, temp1, 20); @@ -591,11 +1104,11 @@ void MathExpGenerator::EmitMathExp(MacroAssembler* masm, __ Move(double_scratch1, temp3, temp1); } __ mul_d(result, result, double_scratch1); - __ Branch(&done); + __ BranchShort(&done); __ bind(&zero); __ Move(result, kDoubleRegZero); - __ Branch(&done); + __ BranchShort(&done); __ bind(&infinity); __ ldc1(result, ExpConstant(2, temp3)); @@ -603,42 +1116,47 @@ void MathExpGenerator::EmitMathExp(MacroAssembler* masm, __ bind(&done); } - +#ifdef DEBUG // nop(CODE_AGE_MARKER_NOP) static const uint32_t kCodeAgePatchFirstInstruction = 0x00010180; +#endif -static byte* GetNoCodeAgeSequence(uint32_t* length) { - // The sequence of instructions that is patched out for aging code is the - // following boilerplate stack-building prologue that is found in FUNCTIONS - static bool initialized = false; - static uint32_t sequence[kNoCodeAgeSequenceLength]; - byte* byte_sequence = reinterpret_cast<byte*>(sequence); - *length = kNoCodeAgeSequenceLength * Assembler::kInstrSize; - if (!initialized) { - CodePatcher patcher(byte_sequence, kNoCodeAgeSequenceLength); - patcher.masm()->Push(ra, fp, cp, a1); - patcher.masm()->nop(Assembler::CODE_AGE_SEQUENCE_NOP); - patcher.masm()->Addu(fp, sp, - Operand(StandardFrameConstants::kFixedFrameSizeFromFp)); - initialized = true; - } - return byte_sequence; + +CodeAgingHelper::CodeAgingHelper() { + ASSERT(young_sequence_.length() == kNoCodeAgeSequenceLength); + // Since patcher is a large object, allocate it dynamically when needed, + // to avoid overloading the stack in stress conditions. + // DONT_FLUSH is used because the CodeAgingHelper is initialized early in + // the process, before MIPS simulator ICache is setup. + SmartPointer<CodePatcher> patcher( + new CodePatcher(young_sequence_.start(), + young_sequence_.length() / Assembler::kInstrSize, + CodePatcher::DONT_FLUSH)); + PredictableCodeSizeScope scope(patcher->masm(), young_sequence_.length()); + patcher->masm()->Push(ra, fp, cp, a1); + patcher->masm()->nop(Assembler::CODE_AGE_SEQUENCE_NOP); + patcher->masm()->Addu( + fp, sp, Operand(StandardFrameConstants::kFixedFrameSizeFromFp)); } -bool Code::IsYoungSequence(byte* sequence) { - uint32_t young_length; - byte* young_sequence = GetNoCodeAgeSequence(&young_length); - bool result = !memcmp(sequence, young_sequence, young_length); - ASSERT(result || - Memory::uint32_at(sequence) == kCodeAgePatchFirstInstruction); +#ifdef DEBUG +bool CodeAgingHelper::IsOld(byte* candidate) const { + return Memory::uint32_at(candidate) == kCodeAgePatchFirstInstruction; +} +#endif + + +bool Code::IsYoungSequence(Isolate* isolate, byte* sequence) { + bool result = isolate->code_aging_helper()->IsYoung(sequence); + ASSERT(result || isolate->code_aging_helper()->IsOld(sequence)); return result; } -void Code::GetCodeAgeAndParity(byte* sequence, Age* age, +void Code::GetCodeAgeAndParity(Isolate* isolate, byte* sequence, Age* age, MarkingParity* parity) { - if (IsYoungSequence(sequence)) { + if (IsYoungSequence(isolate, sequence)) { *age = kNoAgeCodeAge; *parity = NO_MARKING_PARITY; } else { @@ -654,10 +1172,9 @@ void Code::PatchPlatformCodeAge(Isolate* isolate, byte* sequence, Code::Age age, MarkingParity parity) { - uint32_t young_length; - byte* young_sequence = GetNoCodeAgeSequence(&young_length); + uint32_t young_length = isolate->code_aging_helper()->young_sequence_length(); if (age == kNoAgeCodeAge) { - CopyBytes(sequence, young_sequence, young_length); + isolate->code_aging_helper()->CopyYoungSequenceTo(sequence); CPU::FlushICache(sequence, young_length); } else { Code* stub = GetCodeAgeStub(isolate, age, parity); |