diff options
Diffstat (limited to 'src/3rdparty')
69 files changed, 6245 insertions, 9273 deletions
diff --git a/src/3rdparty/masm/assembler/ARM64Assembler.h b/src/3rdparty/masm/assembler/ARM64Assembler.h index 1787e921e8..fcf2e485e8 100644 --- a/src/3rdparty/masm/assembler/ARM64Assembler.h +++ b/src/3rdparty/masm/assembler/ARM64Assembler.h @@ -1980,6 +1980,13 @@ public: } template<int datasize> + ALWAYS_INLINE void stp(RegisterID rt, RegisterID rt2, RegisterID rn, unsigned pimm = 0) + { + CHECK_DATASIZE(); + insn(loadStoreRegisterPairOffset(MEMPAIROPSIZE_INT(datasize), false, MemOp_STORE, pimm, rn, rt, rt2)); + } + + template<int datasize> ALWAYS_INLINE void str(RegisterID rt, RegisterID rn, RegisterID rm) { str<datasize>(rt, rn, rm, UXTX, 0); @@ -3701,6 +3708,23 @@ private: } // 'V' means vector + ALWAYS_INLINE static int loadStoreRegisterPairOffset(MemPairOpSize size, bool V, MemOp opc, int immediate, RegisterID rn, FPRegisterID rt, FPRegisterID rt2) + { + ASSERT(size < 3); + ASSERT(opc == (opc & 1)); // Only load or store, load signed 64 is handled via size. + ASSERT(V || (size != MemPairOp_LoadSigned_32) || (opc == MemOp_LOAD)); // There isn't an integer store signed. + unsigned immedShiftAmount = memPairOffsetShift(V, size); + int imm7 = immediate >> immedShiftAmount; + ASSERT((imm7 << immedShiftAmount) == immediate && isInt7(imm7)); + return (0x29000000 | size << 30 | V << 26 | opc << 22 | (imm7 & 0x7f) << 15 | rt2 << 10 | xOrSp(rn) << 5 | rt); + } + + ALWAYS_INLINE static int loadStoreRegisterPairOffset(MemPairOpSize size, bool V, MemOp opc, int immediate, RegisterID rn, RegisterID rt, RegisterID rt2) + { + return loadStoreRegisterPairOffset(size, V, opc, immediate, rn, xOrZrAsFPR(rt), xOrZrAsFPR(rt2)); + } + + // 'V' means vector // 'S' means shift rm ALWAYS_INLINE static int loadStoreRegisterRegisterOffset(MemOpSize size, bool V, MemOp opc, RegisterID rm, ExtendType option, bool S, RegisterID rn, FPRegisterID rt) { diff --git a/src/3rdparty/masm/assembler/ARMAssembler.cpp b/src/3rdparty/masm/assembler/ARMAssembler.cpp deleted file mode 100644 index 6912d1ea39..0000000000 --- a/src/3rdparty/masm/assembler/ARMAssembler.cpp +++ /dev/null @@ -1,444 +0,0 @@ -/* - * Copyright (C) 2009 University of Szeged - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#if ENABLE(ASSEMBLER) && CPU(ARM_TRADITIONAL) - -#include "ARMAssembler.h" - -namespace JSC { - -// Patching helpers - -void ARMAssembler::patchConstantPoolLoad(void* loadAddr, void* constPoolAddr) -{ - ARMWord *ldr = reinterpret_cast<ARMWord*>(loadAddr); - ARMWord diff = reinterpret_cast<ARMWord*>(constPoolAddr) - ldr; - ARMWord index = (*ldr & 0xfff) >> 1; - - ASSERT(diff >= 1); - if (diff >= 2 || index > 0) { - diff = (diff + index - 2) * sizeof(ARMWord); - ASSERT(diff <= 0xfff); - *ldr = (*ldr & ~0xfff) | diff; - } else - *ldr = (*ldr & ~(0xfff | ARMAssembler::DataTransferUp)) | sizeof(ARMWord); -} - -// Handle immediates - -ARMWord ARMAssembler::getOp2(ARMWord imm) -{ - int rol; - - if (imm <= 0xff) - return Op2Immediate | imm; - - if ((imm & 0xff000000) == 0) { - imm <<= 8; - rol = 8; - } - else { - imm = (imm << 24) | (imm >> 8); - rol = 0; - } - - if ((imm & 0xff000000) == 0) { - imm <<= 8; - rol += 4; - } - - if ((imm & 0xf0000000) == 0) { - imm <<= 4; - rol += 2; - } - - if ((imm & 0xc0000000) == 0) { - imm <<= 2; - rol += 1; - } - - if ((imm & 0x00ffffff) == 0) - return Op2Immediate | (imm >> 24) | (rol << 8); - - return InvalidImmediate; -} - -int ARMAssembler::genInt(int reg, ARMWord imm, bool positive) -{ - // Step1: Search a non-immediate part - ARMWord mask; - ARMWord imm1; - ARMWord imm2; - int rol; - - mask = 0xff000000; - rol = 8; - while(1) { - if ((imm & mask) == 0) { - imm = (imm << rol) | (imm >> (32 - rol)); - rol = 4 + (rol >> 1); - break; - } - rol += 2; - mask >>= 2; - if (mask & 0x3) { - // rol 8 - imm = (imm << 8) | (imm >> 24); - mask = 0xff00; - rol = 24; - while (1) { - if ((imm & mask) == 0) { - imm = (imm << rol) | (imm >> (32 - rol)); - rol = (rol >> 1) - 8; - break; - } - rol += 2; - mask >>= 2; - if (mask & 0x3) - return 0; - } - break; - } - } - - ASSERT((imm & 0xff) == 0); - - if ((imm & 0xff000000) == 0) { - imm1 = Op2Immediate | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8); - imm2 = Op2Immediate | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8); - } else if (imm & 0xc0000000) { - imm1 = Op2Immediate | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); - imm <<= 8; - rol += 4; - - if ((imm & 0xff000000) == 0) { - imm <<= 8; - rol += 4; - } - - if ((imm & 0xf0000000) == 0) { - imm <<= 4; - rol += 2; - } - - if ((imm & 0xc0000000) == 0) { - imm <<= 2; - rol += 1; - } - - if ((imm & 0x00ffffff) == 0) - imm2 = Op2Immediate | (imm >> 24) | ((rol & 0xf) << 8); - else - return 0; - } else { - if ((imm & 0xf0000000) == 0) { - imm <<= 4; - rol += 2; - } - - if ((imm & 0xc0000000) == 0) { - imm <<= 2; - rol += 1; - } - - imm1 = Op2Immediate | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); - imm <<= 8; - rol += 4; - - if ((imm & 0xf0000000) == 0) { - imm <<= 4; - rol += 2; - } - - if ((imm & 0xc0000000) == 0) { - imm <<= 2; - rol += 1; - } - - if ((imm & 0x00ffffff) == 0) - imm2 = Op2Immediate | (imm >> 24) | ((rol & 0xf) << 8); - else - return 0; - } - - if (positive) { - mov(reg, imm1); - orr(reg, reg, imm2); - } else { - mvn(reg, imm1); - bic(reg, reg, imm2); - } - - return 1; -} - -ARMWord ARMAssembler::getImm(ARMWord imm, int tmpReg, bool invert) -{ - ARMWord tmp; - - // Do it by 1 instruction - tmp = getOp2(imm); - if (tmp != InvalidImmediate) - return tmp; - - tmp = getOp2(~imm); - if (tmp != InvalidImmediate) { - if (invert) - return tmp | Op2InvertedImmediate; - mvn(tmpReg, tmp); - return tmpReg; - } - - return encodeComplexImm(imm, tmpReg); -} - -void ARMAssembler::moveImm(ARMWord imm, int dest) -{ - ARMWord tmp; - - // Do it by 1 instruction - tmp = getOp2(imm); - if (tmp != InvalidImmediate) { - mov(dest, tmp); - return; - } - - tmp = getOp2(~imm); - if (tmp != InvalidImmediate) { - mvn(dest, tmp); - return; - } - - encodeComplexImm(imm, dest); -} - -ARMWord ARMAssembler::encodeComplexImm(ARMWord imm, int dest) -{ -#if WTF_ARM_ARCH_AT_LEAST(7) - ARMWord tmp = getImm16Op2(imm); - if (tmp != InvalidImmediate) { - movw(dest, tmp); - return dest; - } - movw(dest, getImm16Op2(imm & 0xffff)); - movt(dest, getImm16Op2(imm >> 16)); - return dest; -#else - // Do it by 2 instruction - if (genInt(dest, imm, true)) - return dest; - if (genInt(dest, ~imm, false)) - return dest; - - ldrImmediate(dest, imm); - return dest; -#endif -} - -// Memory load/store helpers - -void ARMAssembler::dataTransfer32(DataTransferTypeA transferType, RegisterID srcDst, RegisterID base, int32_t offset) -{ - if (offset >= 0) { - if (offset <= 0xfff) - dtrUp(transferType, srcDst, base, offset); - else if (offset <= 0xfffff) { - add(ARMRegisters::S0, base, Op2Immediate | (offset >> 12) | (10 << 8)); - dtrUp(transferType, srcDst, ARMRegisters::S0, (offset & 0xfff)); - } else { - moveImm(offset, ARMRegisters::S0); - dtrUpRegister(transferType, srcDst, base, ARMRegisters::S0); - } - } else { - if (offset >= -0xfff) - dtrDown(transferType, srcDst, base, -offset); - else if (offset >= -0xfffff) { - sub(ARMRegisters::S0, base, Op2Immediate | (-offset >> 12) | (10 << 8)); - dtrDown(transferType, srcDst, ARMRegisters::S0, (-offset & 0xfff)); - } else { - moveImm(offset, ARMRegisters::S0); - dtrUpRegister(transferType, srcDst, base, ARMRegisters::S0); - } - } -} - -void ARMAssembler::baseIndexTransfer32(DataTransferTypeA transferType, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset) -{ - ASSERT(scale >= 0 && scale <= 3); - ARMWord op2 = lsl(index, scale); - - if (!offset) { - dtrUpRegister(transferType, srcDst, base, op2); - return; - } - - if (offset <= 0xfffff && offset >= -0xfffff) { - add(ARMRegisters::S0, base, op2); - dataTransfer32(transferType, srcDst, ARMRegisters::S0, offset); - return; - } - - moveImm(offset, ARMRegisters::S0); - add(ARMRegisters::S0, ARMRegisters::S0, op2); - dtrUpRegister(transferType, srcDst, base, ARMRegisters::S0); -} - -void ARMAssembler::dataTransfer16(DataTransferTypeB transferType, RegisterID srcDst, RegisterID base, int32_t offset) -{ - if (offset >= 0) { - if (offset <= 0xff) - halfDtrUp(transferType, srcDst, base, getOp2Half(offset)); - else if (offset <= 0xffff) { - add(ARMRegisters::S0, base, Op2Immediate | (offset >> 8) | (12 << 8)); - halfDtrUp(transferType, srcDst, ARMRegisters::S0, getOp2Half(offset & 0xff)); - } else { - moveImm(offset, ARMRegisters::S0); - halfDtrUpRegister(transferType, srcDst, base, ARMRegisters::S0); - } - } else { - if (offset >= -0xff) - halfDtrDown(transferType, srcDst, base, getOp2Half(-offset)); - else if (offset >= -0xffff) { - sub(ARMRegisters::S0, base, Op2Immediate | (-offset >> 8) | (12 << 8)); - halfDtrDown(transferType, srcDst, ARMRegisters::S0, getOp2Half(-offset & 0xff)); - } else { - moveImm(offset, ARMRegisters::S0); - halfDtrUpRegister(transferType, srcDst, base, ARMRegisters::S0); - } - } -} - -void ARMAssembler::baseIndexTransfer16(DataTransferTypeB transferType, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset) -{ - if (!scale && !offset) { - halfDtrUpRegister(transferType, srcDst, base, index); - return; - } - - ARMWord op2 = lsl(index, scale); - - if (offset <= 0xffff && offset >= -0xffff) { - add(ARMRegisters::S0, base, op2); - dataTransfer16(transferType, srcDst, ARMRegisters::S0, offset); - return; - } - - moveImm(offset, ARMRegisters::S0); - add(ARMRegisters::S0, ARMRegisters::S0, op2); - halfDtrUpRegister(transferType, srcDst, base, ARMRegisters::S0); -} - -void ARMAssembler::dataTransferFloat(DataTransferTypeFloat transferType, FPRegisterID srcDst, RegisterID base, int32_t offset) -{ - // VFP cannot directly access memory that is not four-byte-aligned - if (!(offset & 0x3)) { - if (offset <= 0x3ff && offset >= 0) { - doubleDtrUp(transferType, srcDst, base, offset >> 2); - return; - } - if (offset <= 0x3ffff && offset >= 0) { - add(ARMRegisters::S0, base, Op2Immediate | (offset >> 10) | (11 << 8)); - doubleDtrUp(transferType, srcDst, ARMRegisters::S0, (offset >> 2) & 0xff); - return; - } - offset = -offset; - - if (offset <= 0x3ff && offset >= 0) { - doubleDtrDown(transferType, srcDst, base, offset >> 2); - return; - } - if (offset <= 0x3ffff && offset >= 0) { - sub(ARMRegisters::S0, base, Op2Immediate | (offset >> 10) | (11 << 8)); - doubleDtrDown(transferType, srcDst, ARMRegisters::S0, (offset >> 2) & 0xff); - return; - } - offset = -offset; - } - - moveImm(offset, ARMRegisters::S0); - add(ARMRegisters::S0, ARMRegisters::S0, base); - doubleDtrUp(transferType, srcDst, ARMRegisters::S0, 0); -} - -void ARMAssembler::baseIndexTransferFloat(DataTransferTypeFloat transferType, FPRegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset) -{ - add(ARMRegisters::S1, base, lsl(index, scale)); - dataTransferFloat(transferType, srcDst, ARMRegisters::S1, offset); -} - -PassRefPtr<ExecutableMemoryHandle> ARMAssembler::executableCopy(JSGlobalData& globalData, void* ownerUID, JITCompilationEffort effort) -{ - // 64-bit alignment is required for next constant pool and JIT code as well - m_buffer.flushWithoutBarrier(true); - if (!m_buffer.isAligned(8)) - bkpt(0); - - RefPtr<ExecutableMemoryHandle> result = m_buffer.executableCopy(globalData, ownerUID, effort); - char* data = reinterpret_cast<char*>(result->start()); - - for (Jumps::Iterator iter = m_jumps.begin(); iter != m_jumps.end(); ++iter) { - // The last bit is set if the constant must be placed on constant pool. - int pos = (iter->m_offset) & (~0x1); - ARMWord* ldrAddr = reinterpret_cast_ptr<ARMWord*>(data + pos); - ARMWord* addr = getLdrImmAddress(ldrAddr); - if (*addr != InvalidBranchTarget) { - if (!(iter->m_offset & 1)) { - intptr_t difference = reinterpret_cast_ptr<ARMWord*>(data + *addr) - (ldrAddr + DefaultPrefetchOffset); - - if ((difference <= MaximumBranchOffsetDistance && difference >= MinimumBranchOffsetDistance)) { - *ldrAddr = B | getConditionalField(*ldrAddr) | (difference & BranchOffsetMask); - continue; - } - } - *addr = reinterpret_cast<ARMWord>(data + *addr); - } - } - - return result; -} - -#if OS(LINUX) && COMPILER(RVCT) - -__asm void ARMAssembler::cacheFlush(void* code, size_t size) -{ - ARM - push {r7} - add r1, r1, r0 - mov r7, #0xf0000 - add r7, r7, #0x2 - mov r2, #0x0 - svc #0x0 - pop {r7} - bx lr -} - -#endif - -} // namespace JSC - -#endif // ENABLE(ASSEMBLER) && CPU(ARM_TRADITIONAL) diff --git a/src/3rdparty/masm/assembler/ARMAssembler.h b/src/3rdparty/masm/assembler/ARMAssembler.h deleted file mode 100644 index 16cc25d4db..0000000000 --- a/src/3rdparty/masm/assembler/ARMAssembler.h +++ /dev/null @@ -1,1129 +0,0 @@ -/* - * Copyright (C) 2009, 2010 University of Szeged - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ARMAssembler_h -#define ARMAssembler_h - -#if ENABLE(ASSEMBLER) && CPU(ARM_TRADITIONAL) - -#include "AssemblerBufferWithConstantPool.h" -#include "JITCompilationEffort.h" -#include <wtf/Assertions.h> -namespace JSC { - - typedef uint32_t ARMWord; - - namespace ARMRegisters { - typedef enum { - r0 = 0, - r1, - r2, - r3, - r4, - r5, - r6, S0 = r6, - r7, - r8, - r9, - r10, - r11, - r12, S1 = r12, - r13, sp = r13, - r14, lr = r14, - r15, pc = r15 - } RegisterID; - - typedef enum { - d0, - d1, - d2, - d3, - d4, - d5, - d6, - d7, SD0 = d7, /* Same as thumb assembler. */ - d8, - d9, - d10, - d11, - d12, - d13, - d14, - d15, - d16, - d17, - d18, - d19, - d20, - d21, - d22, - d23, - d24, - d25, - d26, - d27, - d28, - d29, - d30, - d31 - } FPRegisterID; - - } // namespace ARMRegisters - - class ARMAssembler { - public: - typedef ARMRegisters::RegisterID RegisterID; - typedef ARMRegisters::FPRegisterID FPRegisterID; - typedef AssemblerBufferWithConstantPool<2048, 4, 4, ARMAssembler> ARMBuffer; - typedef SegmentedVector<AssemblerLabel, 64> Jumps; - - ARMAssembler() - : m_indexOfTailOfLastWatchpoint(1) - { - } - - // ARM conditional constants - typedef enum { - EQ = 0x00000000, // Zero - NE = 0x10000000, // Non-zero - CS = 0x20000000, - CC = 0x30000000, - MI = 0x40000000, - PL = 0x50000000, - VS = 0x60000000, - VC = 0x70000000, - HI = 0x80000000, - LS = 0x90000000, - GE = 0xa0000000, - LT = 0xb0000000, - GT = 0xc0000000, - LE = 0xd0000000, - AL = 0xe0000000 - } Condition; - - // ARM instruction constants - enum { - AND = (0x0 << 21), - EOR = (0x1 << 21), - SUB = (0x2 << 21), - RSB = (0x3 << 21), - ADD = (0x4 << 21), - ADC = (0x5 << 21), - SBC = (0x6 << 21), - RSC = (0x7 << 21), - TST = (0x8 << 21), - TEQ = (0x9 << 21), - CMP = (0xa << 21), - CMN = (0xb << 21), - ORR = (0xc << 21), - MOV = (0xd << 21), - BIC = (0xe << 21), - MVN = (0xf << 21), - MUL = 0x00000090, - MULL = 0x00c00090, - VMOV_F64 = 0x0eb00b40, - VADD_F64 = 0x0e300b00, - VDIV_F64 = 0x0e800b00, - VSUB_F64 = 0x0e300b40, - VMUL_F64 = 0x0e200b00, - VCMP_F64 = 0x0eb40b40, - VSQRT_F64 = 0x0eb10bc0, - VABS_F64 = 0x0eb00bc0, - VNEG_F64 = 0x0eb10b40, - STMDB = 0x09200000, - LDMIA = 0x08b00000, - B = 0x0a000000, - BL = 0x0b000000, - BX = 0x012fff10, - VMOV_VFP64 = 0x0c400a10, - VMOV_ARM64 = 0x0c500a10, - VMOV_VFP32 = 0x0e000a10, - VMOV_ARM32 = 0x0e100a10, - VCVT_F64_S32 = 0x0eb80bc0, - VCVT_S32_F64 = 0x0ebd0b40, - VCVT_U32_F64 = 0x0ebc0b40, - VCVT_F32_F64 = 0x0eb70bc0, - VCVT_F64_F32 = 0x0eb70ac0, - VMRS_APSR = 0x0ef1fa10, - CLZ = 0x016f0f10, - BKPT = 0xe1200070, - BLX = 0x012fff30, -#if WTF_ARM_ARCH_AT_LEAST(7) - MOVW = 0x03000000, - MOVT = 0x03400000, -#endif - NOP = 0xe1a00000, - }; - - enum { - Op2Immediate = (1 << 25), - ImmediateForHalfWordTransfer = (1 << 22), - Op2InvertedImmediate = (1 << 26), - SetConditionalCodes = (1 << 20), - Op2IsRegisterArgument = (1 << 25), - // Data transfer flags. - DataTransferUp = (1 << 23), - DataTransferWriteBack = (1 << 21), - DataTransferPostUpdate = (1 << 24), - DataTransferLoad = (1 << 20), - ByteDataTransfer = (1 << 22), - }; - - enum DataTransferTypeA { - LoadUint32 = 0x05000000 | DataTransferLoad, - LoadUint8 = 0x05400000 | DataTransferLoad, - StoreUint32 = 0x05000000, - StoreUint8 = 0x05400000, - }; - - enum DataTransferTypeB { - LoadUint16 = 0x010000b0 | DataTransferLoad, - LoadInt16 = 0x010000f0 | DataTransferLoad, - LoadInt8 = 0x010000d0 | DataTransferLoad, - StoreUint16 = 0x010000b0, - }; - - enum DataTransferTypeFloat { - LoadFloat = 0x0d000a00 | DataTransferLoad, - LoadDouble = 0x0d000b00 | DataTransferLoad, - StoreFloat = 0x0d000a00, - StoreDouble = 0x0d000b00, - }; - - // Masks of ARM instructions - enum { - BranchOffsetMask = 0x00ffffff, - ConditionalFieldMask = 0xf0000000, - DataTransferOffsetMask = 0xfff, - }; - - enum { - MinimumBranchOffsetDistance = -0x00800000, - MaximumBranchOffsetDistance = 0x007fffff, - }; - - enum { - padForAlign8 = 0x00, - padForAlign16 = 0x0000, - padForAlign32 = 0xe12fff7f // 'bkpt 0xffff' instruction. - }; - - static const ARMWord InvalidImmediate = 0xf0000000; - static const ARMWord InvalidBranchTarget = 0xffffffff; - static const int DefaultPrefetchOffset = 2; - - static const ARMWord BlxInstructionMask = 0x012fff30; - static const ARMWord LdrOrAddInstructionMask = 0x0ff00000; - static const ARMWord LdrPcImmediateInstructionMask = 0x0f7f0000; - - static const ARMWord AddImmediateInstruction = 0x02800000; - static const ARMWord BlxInstruction = 0x012fff30; - static const ARMWord LdrImmediateInstruction = 0x05900000; - static const ARMWord LdrPcImmediateInstruction = 0x051f0000; - - // Instruction formating - - void emitInstruction(ARMWord op, int rd, int rn, ARMWord op2) - { - ASSERT(((op2 & ~Op2Immediate) <= 0xfff) || (((op2 & ~ImmediateForHalfWordTransfer) <= 0xfff))); - m_buffer.putInt(op | RN(rn) | RD(rd) | op2); - } - - void emitDoublePrecisionInstruction(ARMWord op, int dd, int dn, int dm) - { - ASSERT((dd >= 0 && dd <= 31) && (dn >= 0 && dn <= 31) && (dm >= 0 && dm <= 31)); - m_buffer.putInt(op | ((dd & 0xf) << 12) | ((dd & 0x10) << (22 - 4)) - | ((dn & 0xf) << 16) | ((dn & 0x10) << (7 - 4)) - | (dm & 0xf) | ((dm & 0x10) << (5 - 4))); - } - - void emitSinglePrecisionInstruction(ARMWord op, int sd, int sn, int sm) - { - ASSERT((sd >= 0 && sd <= 31) && (sn >= 0 && sn <= 31) && (sm >= 0 && sm <= 31)); - m_buffer.putInt(op | ((sd >> 1) << 12) | ((sd & 0x1) << 22) - | ((sn >> 1) << 16) | ((sn & 0x1) << 7) - | (sm >> 1) | ((sm & 0x1) << 5)); - } - - void bitAnd(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | AND, rd, rn, op2); - } - - void bitAnds(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | AND | SetConditionalCodes, rd, rn, op2); - } - - void eor(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | EOR, rd, rn, op2); - } - - void eors(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | EOR | SetConditionalCodes, rd, rn, op2); - } - - void sub(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | SUB, rd, rn, op2); - } - - void subs(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | SUB | SetConditionalCodes, rd, rn, op2); - } - - void rsb(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | RSB, rd, rn, op2); - } - - void rsbs(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | RSB | SetConditionalCodes, rd, rn, op2); - } - - void add(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | ADD, rd, rn, op2); - } - - void adds(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | ADD | SetConditionalCodes, rd, rn, op2); - } - - void adc(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | ADC, rd, rn, op2); - } - - void adcs(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | ADC | SetConditionalCodes, rd, rn, op2); - } - - void sbc(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | SBC, rd, rn, op2); - } - - void sbcs(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | SBC | SetConditionalCodes, rd, rn, op2); - } - - void rsc(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | RSC, rd, rn, op2); - } - - void rscs(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | RSC | SetConditionalCodes, rd, rn, op2); - } - - void tst(int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | TST | SetConditionalCodes, 0, rn, op2); - } - - void teq(int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | TEQ | SetConditionalCodes, 0, rn, op2); - } - - void cmp(int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | CMP | SetConditionalCodes, 0, rn, op2); - } - - void cmn(int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | CMN | SetConditionalCodes, 0, rn, op2); - } - - void orr(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | ORR, rd, rn, op2); - } - - void orrs(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | ORR | SetConditionalCodes, rd, rn, op2); - } - - void mov(int rd, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | MOV, rd, ARMRegisters::r0, op2); - } - -#if WTF_ARM_ARCH_AT_LEAST(7) - void movw(int rd, ARMWord op2, Condition cc = AL) - { - ASSERT((op2 | 0xf0fff) == 0xf0fff); - m_buffer.putInt(toARMWord(cc) | MOVW | RD(rd) | op2); - } - - void movt(int rd, ARMWord op2, Condition cc = AL) - { - ASSERT((op2 | 0xf0fff) == 0xf0fff); - m_buffer.putInt(toARMWord(cc) | MOVT | RD(rd) | op2); - } -#endif - - void movs(int rd, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | MOV | SetConditionalCodes, rd, ARMRegisters::r0, op2); - } - - void bic(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | BIC, rd, rn, op2); - } - - void bics(int rd, int rn, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | BIC | SetConditionalCodes, rd, rn, op2); - } - - void mvn(int rd, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | MVN, rd, ARMRegisters::r0, op2); - } - - void mvns(int rd, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | MVN | SetConditionalCodes, rd, ARMRegisters::r0, op2); - } - - void mul(int rd, int rn, int rm, Condition cc = AL) - { - m_buffer.putInt(toARMWord(cc) | MUL | RN(rd) | RS(rn) | RM(rm)); - } - - void muls(int rd, int rn, int rm, Condition cc = AL) - { - m_buffer.putInt(toARMWord(cc) | MUL | SetConditionalCodes | RN(rd) | RS(rn) | RM(rm)); - } - - void mull(int rdhi, int rdlo, int rn, int rm, Condition cc = AL) - { - m_buffer.putInt(toARMWord(cc) | MULL | RN(rdhi) | RD(rdlo) | RS(rn) | RM(rm)); - } - - void vmov_f64(int dd, int dm, Condition cc = AL) - { - emitDoublePrecisionInstruction(toARMWord(cc) | VMOV_F64, dd, 0, dm); - } - - void vadd_f64(int dd, int dn, int dm, Condition cc = AL) - { - emitDoublePrecisionInstruction(toARMWord(cc) | VADD_F64, dd, dn, dm); - } - - void vdiv_f64(int dd, int dn, int dm, Condition cc = AL) - { - emitDoublePrecisionInstruction(toARMWord(cc) | VDIV_F64, dd, dn, dm); - } - - void vsub_f64(int dd, int dn, int dm, Condition cc = AL) - { - emitDoublePrecisionInstruction(toARMWord(cc) | VSUB_F64, dd, dn, dm); - } - - void vmul_f64(int dd, int dn, int dm, Condition cc = AL) - { - emitDoublePrecisionInstruction(toARMWord(cc) | VMUL_F64, dd, dn, dm); - } - - void vcmp_f64(int dd, int dm, Condition cc = AL) - { - emitDoublePrecisionInstruction(toARMWord(cc) | VCMP_F64, dd, 0, dm); - } - - void vsqrt_f64(int dd, int dm, Condition cc = AL) - { - emitDoublePrecisionInstruction(toARMWord(cc) | VSQRT_F64, dd, 0, dm); - } - - void vabs_f64(int dd, int dm, Condition cc = AL) - { - emitDoublePrecisionInstruction(toARMWord(cc) | VABS_F64, dd, 0, dm); - } - - void vneg_f64(int dd, int dm, Condition cc = AL) - { - emitDoublePrecisionInstruction(toARMWord(cc) | VNEG_F64, dd, 0, dm); - } - - void ldrImmediate(int rd, ARMWord imm, Condition cc = AL) - { - m_buffer.putIntWithConstantInt(toARMWord(cc) | LoadUint32 | DataTransferUp | RN(ARMRegisters::pc) | RD(rd), imm, true); - } - - void ldrUniqueImmediate(int rd, ARMWord imm, Condition cc = AL) - { - m_buffer.putIntWithConstantInt(toARMWord(cc) | LoadUint32 | DataTransferUp | RN(ARMRegisters::pc) | RD(rd), imm); - } - - void dtrUp(DataTransferTypeA transferType, int rd, int rb, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | transferType | DataTransferUp, rd, rb, op2); - } - - void dtrUpRegister(DataTransferTypeA transferType, int rd, int rb, int rm, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | transferType | DataTransferUp | Op2IsRegisterArgument, rd, rb, rm); - } - - void dtrDown(DataTransferTypeA transferType, int rd, int rb, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | transferType, rd, rb, op2); - } - - void dtrDownRegister(DataTransferTypeA transferType, int rd, int rb, int rm, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | transferType | Op2IsRegisterArgument, rd, rb, rm); - } - - void halfDtrUp(DataTransferTypeB transferType, int rd, int rb, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | transferType | DataTransferUp, rd, rb, op2); - } - - void halfDtrUpRegister(DataTransferTypeB transferType, int rd, int rn, int rm, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | transferType | DataTransferUp, rd, rn, rm); - } - - void halfDtrDown(DataTransferTypeB transferType, int rd, int rb, ARMWord op2, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | transferType, rd, rb, op2); - } - - void halfDtrDownRegister(DataTransferTypeB transferType, int rd, int rn, int rm, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | transferType, rd, rn, rm); - } - - void doubleDtrUp(DataTransferTypeFloat type, int rd, int rb, ARMWord op2, Condition cc = AL) - { - ASSERT(op2 <= 0xff && rd <= 15); - /* Only d0-d15 and s0, s2, s4 ... s30 are supported. */ - m_buffer.putInt(toARMWord(cc) | DataTransferUp | type | (rd << 12) | RN(rb) | op2); - } - - void doubleDtrDown(DataTransferTypeFloat type, int rd, int rb, ARMWord op2, Condition cc = AL) - { - ASSERT(op2 <= 0xff && rd <= 15); - /* Only d0-d15 and s0, s2, s4 ... s30 are supported. */ - m_buffer.putInt(toARMWord(cc) | type | (rd << 12) | RN(rb) | op2); - } - - void push(int reg, Condition cc = AL) - { - ASSERT(ARMWord(reg) <= 0xf); - m_buffer.putInt(toARMWord(cc) | StoreUint32 | DataTransferWriteBack | RN(ARMRegisters::sp) | RD(reg) | 0x4); - } - - void pop(int reg, Condition cc = AL) - { - ASSERT(ARMWord(reg) <= 0xf); - m_buffer.putInt(toARMWord(cc) | (LoadUint32 ^ DataTransferPostUpdate) | DataTransferUp | RN(ARMRegisters::sp) | RD(reg) | 0x4); - } - - inline void poke(int reg, Condition cc = AL) - { - dtrDown(StoreUint32, ARMRegisters::sp, 0, reg, cc); - } - - inline void peek(int reg, Condition cc = AL) - { - dtrUp(LoadUint32, reg, ARMRegisters::sp, 0, cc); - } - - void vmov_vfp64(int sm, int rt, int rt2, Condition cc = AL) - { - ASSERT(rt != rt2); - m_buffer.putInt(toARMWord(cc) | VMOV_VFP64 | RN(rt2) | RD(rt) | (sm & 0xf) | ((sm & 0x10) << (5 - 4))); - } - - void vmov_arm64(int rt, int rt2, int sm, Condition cc = AL) - { - ASSERT(rt != rt2); - m_buffer.putInt(toARMWord(cc) | VMOV_ARM64 | RN(rt2) | RD(rt) | (sm & 0xf) | ((sm & 0x10) << (5 - 4))); - } - - void vmov_vfp32(int sn, int rt, Condition cc = AL) - { - ASSERT(rt <= 15); - emitSinglePrecisionInstruction(toARMWord(cc) | VMOV_VFP32, rt << 1, sn, 0); - } - - void vmov_arm32(int rt, int sn, Condition cc = AL) - { - ASSERT(rt <= 15); - emitSinglePrecisionInstruction(toARMWord(cc) | VMOV_ARM32, rt << 1, sn, 0); - } - - void vcvt_f64_s32(int dd, int sm, Condition cc = AL) - { - ASSERT(!(sm & 0x1)); // sm must be divisible by 2 - emitDoublePrecisionInstruction(toARMWord(cc) | VCVT_F64_S32, dd, 0, (sm >> 1)); - } - - void vcvt_s32_f64(int sd, int dm, Condition cc = AL) - { - ASSERT(!(sd & 0x1)); // sd must be divisible by 2 - emitDoublePrecisionInstruction(toARMWord(cc) | VCVT_S32_F64, (sd >> 1), 0, dm); - } - - void vcvt_u32_f64(int sd, int dm, Condition cc = AL) - { - ASSERT(!(sd & 0x1)); // sd must be divisible by 2 - emitDoublePrecisionInstruction(toARMWord(cc) | VCVT_U32_F64, (sd >> 1), 0, dm); - } - - void vcvt_f64_f32(int dd, int sm, Condition cc = AL) - { - ASSERT(dd <= 15 && sm <= 15); - emitDoublePrecisionInstruction(toARMWord(cc) | VCVT_F64_F32, dd, 0, sm); - } - - void vcvt_f32_f64(int dd, int sm, Condition cc = AL) - { - ASSERT(dd <= 15 && sm <= 15); - emitDoublePrecisionInstruction(toARMWord(cc) | VCVT_F32_F64, dd, 0, sm); - } - - void vmrs_apsr(Condition cc = AL) - { - m_buffer.putInt(toARMWord(cc) | VMRS_APSR); - } - - void clz(int rd, int rm, Condition cc = AL) - { - m_buffer.putInt(toARMWord(cc) | CLZ | RD(rd) | RM(rm)); - } - - void bkpt(ARMWord value) - { - m_buffer.putInt(BKPT | ((value & 0xff0) << 4) | (value & 0xf)); - } - - void nop() - { - m_buffer.putInt(NOP); - } - - void bx(int rm, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | BX, 0, 0, RM(rm)); - } - - AssemblerLabel blx(int rm, Condition cc = AL) - { - emitInstruction(toARMWord(cc) | BLX, 0, 0, RM(rm)); - return m_buffer.label(); - } - - static ARMWord lsl(int reg, ARMWord value) - { - ASSERT(reg <= ARMRegisters::pc); - ASSERT(value <= 0x1f); - return reg | (value << 7) | 0x00; - } - - static ARMWord lsr(int reg, ARMWord value) - { - ASSERT(reg <= ARMRegisters::pc); - ASSERT(value <= 0x1f); - return reg | (value << 7) | 0x20; - } - - static ARMWord asr(int reg, ARMWord value) - { - ASSERT(reg <= ARMRegisters::pc); - ASSERT(value <= 0x1f); - return reg | (value << 7) | 0x40; - } - - static ARMWord lslRegister(int reg, int shiftReg) - { - ASSERT(reg <= ARMRegisters::pc); - ASSERT(shiftReg <= ARMRegisters::pc); - return reg | (shiftReg << 8) | 0x10; - } - - static ARMWord lsrRegister(int reg, int shiftReg) - { - ASSERT(reg <= ARMRegisters::pc); - ASSERT(shiftReg <= ARMRegisters::pc); - return reg | (shiftReg << 8) | 0x30; - } - - static ARMWord asrRegister(int reg, int shiftReg) - { - ASSERT(reg <= ARMRegisters::pc); - ASSERT(shiftReg <= ARMRegisters::pc); - return reg | (shiftReg << 8) | 0x50; - } - - // General helpers - - size_t codeSize() const - { - return m_buffer.codeSize(); - } - - void ensureSpace(int insnSpace, int constSpace) - { - m_buffer.ensureSpace(insnSpace, constSpace); - } - - int sizeOfConstantPool() - { - return m_buffer.sizeOfConstantPool(); - } - - AssemblerLabel labelIgnoringWatchpoints() - { - m_buffer.ensureSpaceForAnyInstruction(); - return m_buffer.label(); - } - - AssemblerLabel labelForWatchpoint() - { - m_buffer.ensureSpaceForAnyInstruction(maxJumpReplacementSize() / sizeof(ARMWord)); - AssemblerLabel result = m_buffer.label(); - if (result.m_offset != (m_indexOfTailOfLastWatchpoint - maxJumpReplacementSize())) - result = label(); - m_indexOfTailOfLastWatchpoint = result.m_offset + maxJumpReplacementSize(); - return label(); - } - - AssemblerLabel label() - { - AssemblerLabel result = labelIgnoringWatchpoints(); - while (result.m_offset + 1 < m_indexOfTailOfLastWatchpoint) { - nop(); - // The available number of instructions are ensured by labelForWatchpoint. - result = m_buffer.label(); - } - return result; - } - - AssemblerLabel align(int alignment) - { - while (!m_buffer.isAligned(alignment)) - mov(ARMRegisters::r0, ARMRegisters::r0); - - return label(); - } - - AssemblerLabel loadBranchTarget(int rd, Condition cc = AL, int useConstantPool = 0) - { - ensureSpace(sizeof(ARMWord), sizeof(ARMWord)); - m_jumps.append(m_buffer.codeSize() | (useConstantPool & 0x1)); - ldrUniqueImmediate(rd, InvalidBranchTarget, cc); - return m_buffer.label(); - } - - AssemblerLabel jmp(Condition cc = AL, int useConstantPool = 0) - { - return loadBranchTarget(ARMRegisters::pc, cc, useConstantPool); - } - - PassRefPtr<ExecutableMemoryHandle> executableCopy(JSGlobalData&, void* ownerUID, JITCompilationEffort); - - unsigned debugOffset() { return m_buffer.debugOffset(); } - - // DFG assembly helpers for moving data between fp and registers. - void vmov(RegisterID rd1, RegisterID rd2, FPRegisterID rn) - { - vmov_arm64(rd1, rd2, rn); - } - - void vmov(FPRegisterID rd, RegisterID rn1, RegisterID rn2) - { - vmov_vfp64(rd, rn1, rn2); - } - - // Patching helpers - - static ARMWord* getLdrImmAddress(ARMWord* insn) - { - // Check for call - if ((*insn & LdrPcImmediateInstructionMask) != LdrPcImmediateInstruction) { - // Must be BLX - ASSERT((*insn & BlxInstructionMask) == BlxInstruction); - insn--; - } - - // Must be an ldr ..., [pc +/- imm] - ASSERT((*insn & LdrPcImmediateInstructionMask) == LdrPcImmediateInstruction); - - ARMWord addr = reinterpret_cast<ARMWord>(insn) + DefaultPrefetchOffset * sizeof(ARMWord); - if (*insn & DataTransferUp) - return reinterpret_cast<ARMWord*>(addr + (*insn & DataTransferOffsetMask)); - return reinterpret_cast<ARMWord*>(addr - (*insn & DataTransferOffsetMask)); - } - - static ARMWord* getLdrImmAddressOnPool(ARMWord* insn, uint32_t* constPool) - { - // Must be an ldr ..., [pc +/- imm] - ASSERT((*insn & LdrPcImmediateInstructionMask) == LdrPcImmediateInstruction); - - if (*insn & 0x1) - return reinterpret_cast<ARMWord*>(constPool + ((*insn & DataTransferOffsetMask) >> 1)); - return getLdrImmAddress(insn); - } - - static void patchPointerInternal(intptr_t from, void* to) - { - ARMWord* insn = reinterpret_cast<ARMWord*>(from); - ARMWord* addr = getLdrImmAddress(insn); - *addr = reinterpret_cast<ARMWord>(to); - } - - static ARMWord patchConstantPoolLoad(ARMWord load, ARMWord value) - { - value = (value << 1) + 1; - ASSERT(!(value & ~DataTransferOffsetMask)); - return (load & ~DataTransferOffsetMask) | value; - } - - static void patchConstantPoolLoad(void* loadAddr, void* constPoolAddr); - - // Read pointers - static void* readPointer(void* from) - { - ARMWord* instruction = reinterpret_cast<ARMWord*>(from); - ARMWord* address = getLdrImmAddress(instruction); - return *reinterpret_cast<void**>(address); - } - - // Patch pointers - - static void linkPointer(void* code, AssemblerLabel from, void* to) - { - patchPointerInternal(reinterpret_cast<intptr_t>(code) + from.m_offset, to); - } - - static void repatchInt32(void* where, int32_t to) - { - patchPointerInternal(reinterpret_cast<intptr_t>(where), reinterpret_cast<void*>(to)); - } - - static void repatchCompact(void* where, int32_t value) - { - ARMWord* instruction = reinterpret_cast<ARMWord*>(where); - ASSERT((*instruction & 0x0f700000) == LoadUint32); - if (value >= 0) - *instruction = (*instruction & 0xff7ff000) | DataTransferUp | value; - else - *instruction = (*instruction & 0xff7ff000) | -value; - cacheFlush(instruction, sizeof(ARMWord)); - } - - static void repatchPointer(void* from, void* to) - { - patchPointerInternal(reinterpret_cast<intptr_t>(from), to); - } - - // Linkers - static intptr_t getAbsoluteJumpAddress(void* base, int offset = 0) - { - return reinterpret_cast<intptr_t>(base) + offset - sizeof(ARMWord); - } - - void linkJump(AssemblerLabel from, AssemblerLabel to) - { - ARMWord* insn = reinterpret_cast<ARMWord*>(getAbsoluteJumpAddress(m_buffer.data(), from.m_offset)); - ARMWord* addr = getLdrImmAddressOnPool(insn, m_buffer.poolAddress()); - *addr = toARMWord(to.m_offset); - } - - static void linkJump(void* code, AssemblerLabel from, void* to) - { - patchPointerInternal(getAbsoluteJumpAddress(code, from.m_offset), to); - } - - static void relinkJump(void* from, void* to) - { - patchPointerInternal(getAbsoluteJumpAddress(from), to); - } - - static void linkCall(void* code, AssemblerLabel from, void* to) - { - patchPointerInternal(getAbsoluteJumpAddress(code, from.m_offset), to); - } - - static void relinkCall(void* from, void* to) - { - patchPointerInternal(getAbsoluteJumpAddress(from), to); - } - - static void* readCallTarget(void* from) - { - return reinterpret_cast<void*>(readPointer(reinterpret_cast<void*>(getAbsoluteJumpAddress(from)))); - } - - static void replaceWithJump(void* instructionStart, void* to) - { - ARMWord* instruction = reinterpret_cast<ARMWord*>(instructionStart); - intptr_t difference = reinterpret_cast<intptr_t>(to) - (reinterpret_cast<intptr_t>(instruction) + DefaultPrefetchOffset * sizeof(ARMWord)); - - if (!(difference & 1)) { - difference >>= 2; - if ((difference <= MaximumBranchOffsetDistance && difference >= MinimumBranchOffsetDistance)) { - // Direct branch. - instruction[0] = B | AL | (difference & BranchOffsetMask); - cacheFlush(instruction, sizeof(ARMWord)); - return; - } - } - - // Load target. - instruction[0] = LoadUint32 | AL | RN(ARMRegisters::pc) | RD(ARMRegisters::pc) | 4; - instruction[1] = reinterpret_cast<ARMWord>(to); - cacheFlush(instruction, sizeof(ARMWord) * 2); - } - - static ptrdiff_t maxJumpReplacementSize() - { - return sizeof(ARMWord) * 2; - } - - static void replaceWithLoad(void* instructionStart) - { - ARMWord* instruction = reinterpret_cast<ARMWord*>(instructionStart); - cacheFlush(instruction, sizeof(ARMWord)); - - ASSERT((*instruction & LdrOrAddInstructionMask) == AddImmediateInstruction || (*instruction & LdrOrAddInstructionMask) == LdrImmediateInstruction); - if ((*instruction & LdrOrAddInstructionMask) == AddImmediateInstruction) { - *instruction = (*instruction & ~LdrOrAddInstructionMask) | LdrImmediateInstruction; - cacheFlush(instruction, sizeof(ARMWord)); - } - } - - static void replaceWithAddressComputation(void* instructionStart) - { - ARMWord* instruction = reinterpret_cast<ARMWord*>(instructionStart); - cacheFlush(instruction, sizeof(ARMWord)); - - ASSERT((*instruction & LdrOrAddInstructionMask) == AddImmediateInstruction || (*instruction & LdrOrAddInstructionMask) == LdrImmediateInstruction); - if ((*instruction & LdrOrAddInstructionMask) == LdrImmediateInstruction) { - *instruction = (*instruction & ~LdrOrAddInstructionMask) | AddImmediateInstruction; - cacheFlush(instruction, sizeof(ARMWord)); - } - } - - static void revertBranchPtrWithPatch(void* instructionStart, RegisterID rn, ARMWord imm) - { - ARMWord* instruction = reinterpret_cast<ARMWord*>(instructionStart); - - ASSERT((instruction[2] & LdrPcImmediateInstructionMask) == LdrPcImmediateInstruction); - instruction[0] = toARMWord(AL) | ((instruction[2] & 0x0fff0fff) + sizeof(ARMWord)) | RD(ARMRegisters::S1); - *getLdrImmAddress(instruction) = imm; - instruction[1] = toARMWord(AL) | CMP | SetConditionalCodes | RN(rn) | RM(ARMRegisters::S1); - cacheFlush(instruction, 2 * sizeof(ARMWord)); - } - - // Address operations - - static void* getRelocatedAddress(void* code, AssemblerLabel label) - { - return reinterpret_cast<void*>(reinterpret_cast<char*>(code) + label.m_offset); - } - - // Address differences - - static int getDifferenceBetweenLabels(AssemblerLabel a, AssemblerLabel b) - { - return b.m_offset - a.m_offset; - } - - static unsigned getCallReturnOffset(AssemblerLabel call) - { - return call.m_offset; - } - - // Handle immediates - - static ARMWord getOp2(ARMWord imm); - - // Fast case if imm is known to be between 0 and 0xff - static ARMWord getOp2Byte(ARMWord imm) - { - ASSERT(imm <= 0xff); - return Op2Immediate | imm; - } - - static ARMWord getOp2Half(ARMWord imm) - { - ASSERT(imm <= 0xff); - return ImmediateForHalfWordTransfer | (imm & 0x0f) | ((imm & 0xf0) << 4); - } - -#if WTF_ARM_ARCH_AT_LEAST(7) - static ARMWord getImm16Op2(ARMWord imm) - { - if (imm <= 0xffff) - return (imm & 0xf000) << 4 | (imm & 0xfff); - return InvalidImmediate; - } -#endif - ARMWord getImm(ARMWord imm, int tmpReg, bool invert = false); - void moveImm(ARMWord imm, int dest); - ARMWord encodeComplexImm(ARMWord imm, int dest); - - // Memory load/store helpers - - void dataTransfer32(DataTransferTypeA, RegisterID srcDst, RegisterID base, int32_t offset); - void baseIndexTransfer32(DataTransferTypeA, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset); - void dataTransfer16(DataTransferTypeB, RegisterID srcDst, RegisterID base, int32_t offset); - void baseIndexTransfer16(DataTransferTypeB, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset); - void dataTransferFloat(DataTransferTypeFloat, FPRegisterID srcDst, RegisterID base, int32_t offset); - void baseIndexTransferFloat(DataTransferTypeFloat, FPRegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset); - - // Constant pool hnadlers - - static ARMWord placeConstantPoolBarrier(int offset) - { - offset = (offset - sizeof(ARMWord)) >> 2; - ASSERT((offset <= MaximumBranchOffsetDistance && offset >= MinimumBranchOffsetDistance)); - return AL | B | (offset & BranchOffsetMask); - } - -#if OS(LINUX) && COMPILER(GCC) - static inline void linuxPageFlush(uintptr_t begin, uintptr_t end) - { - asm volatile( - "push {r7}\n" - "mov r0, %0\n" - "mov r1, %1\n" - "mov r7, #0xf0000\n" - "add r7, r7, #0x2\n" - "mov r2, #0x0\n" - "svc 0x0\n" - "pop {r7}\n" - : - : "r" (begin), "r" (end) - : "r0", "r1", "r2"); - } -#endif - -#if OS(LINUX) && COMPILER(RVCT) - static __asm void cacheFlush(void* code, size_t); -#else - static void cacheFlush(void* code, size_t size) - { -#if OS(LINUX) && COMPILER(GCC) - size_t page = pageSize(); - uintptr_t current = reinterpret_cast<uintptr_t>(code); - uintptr_t end = current + size; - uintptr_t firstPageEnd = (current & ~(page - 1)) + page; - - if (end <= firstPageEnd) { - linuxPageFlush(current, end); - return; - } - - linuxPageFlush(current, firstPageEnd); - - for (current = firstPageEnd; current + page < end; current += page) - linuxPageFlush(current, current + page); - - linuxPageFlush(current, end); -#elif OS(WINCE) - CacheRangeFlush(code, size, CACHE_SYNC_ALL); -#elif OS(QNX) && ENABLE(ASSEMBLER_WX_EXCLUSIVE) - UNUSED_PARAM(code); - UNUSED_PARAM(size); -#elif OS(QNX) - msync(code, size, MS_INVALIDATE_ICACHE); -#else -#error "The cacheFlush support is missing on this platform." -#endif - } -#endif - - private: - static ARMWord RM(int reg) - { - ASSERT(reg <= ARMRegisters::pc); - return reg; - } - - static ARMWord RS(int reg) - { - ASSERT(reg <= ARMRegisters::pc); - return reg << 8; - } - - static ARMWord RD(int reg) - { - ASSERT(reg <= ARMRegisters::pc); - return reg << 12; - } - - static ARMWord RN(int reg) - { - ASSERT(reg <= ARMRegisters::pc); - return reg << 16; - } - - static ARMWord getConditionalField(ARMWord i) - { - return i & ConditionalFieldMask; - } - - static ARMWord toARMWord(Condition cc) - { - return static_cast<ARMWord>(cc); - } - - static ARMWord toARMWord(uint32_t u) - { - return static_cast<ARMWord>(u); - } - - int genInt(int reg, ARMWord imm, bool positive); - - ARMBuffer m_buffer; - Jumps m_jumps; - uint32_t m_indexOfTailOfLastWatchpoint; - }; - -} // namespace JSC - -#endif // ENABLE(ASSEMBLER) && CPU(ARM_TRADITIONAL) - -#endif // ARMAssembler_h diff --git a/src/3rdparty/masm/assembler/AbstractMacroAssembler.h b/src/3rdparty/masm/assembler/AbstractMacroAssembler.h index 1076384900..d0c1c4613e 100644 --- a/src/3rdparty/masm/assembler/AbstractMacroAssembler.h +++ b/src/3rdparty/masm/assembler/AbstractMacroAssembler.h @@ -563,12 +563,6 @@ public: { ASSERT((type == ARM64Assembler::JumpTestBit) || (type == ARM64Assembler::JumpTestBitFixedSize)); } -#elif CPU(SH4) - Jump(AssemblerLabel jmp, SH4Assembler::JumpType type = SH4Assembler::JumpFar) - : m_label(jmp) - , m_type(type) - { - } #else Jump(AssemblerLabel jmp) : m_label(jmp) @@ -598,8 +592,6 @@ public: masm->m_assembler.linkJump(m_label, masm->m_assembler.label(), m_type, m_condition, m_bitNumber, m_compareRegister); else masm->m_assembler.linkJump(m_label, masm->m_assembler.label(), m_type, m_condition); -#elif CPU(SH4) - masm->m_assembler.linkJump(m_label, masm->m_assembler.label(), m_type); #else masm->m_assembler.linkJump(m_label, masm->m_assembler.label()); #endif @@ -640,9 +632,6 @@ public: unsigned m_bitNumber; ARM64Assembler::RegisterID m_compareRegister; #endif -#if CPU(SH4) - SH4Assembler::JumpType m_type; -#endif }; #endif diff --git a/src/3rdparty/masm/assembler/AssemblerBuffer.h b/src/3rdparty/masm/assembler/AssemblerBuffer.h index 277ec1043c..45874235b6 100644 --- a/src/3rdparty/masm/assembler/AssemblerBuffer.h +++ b/src/3rdparty/masm/assembler/AssemblerBuffer.h @@ -102,7 +102,7 @@ namespace JSC { void putIntegralUnchecked(IntegralType value) { ASSERT(isAvailable(sizeof(IntegralType))); - *reinterpret_cast_ptr<IntegralType*>(m_buffer + m_index) = value; + memcpy(m_buffer + m_index, &value, sizeof(IntegralType)); m_index += sizeof(IntegralType); } diff --git a/src/3rdparty/masm/assembler/LinkBuffer.cpp b/src/3rdparty/masm/assembler/LinkBuffer.cpp index 74c278135b..44a11706c9 100644 --- a/src/3rdparty/masm/assembler/LinkBuffer.cpp +++ b/src/3rdparty/masm/assembler/LinkBuffer.cpp @@ -75,23 +75,6 @@ void LinkBuffer::dumpCode(void* code, size_t size) for (unsigned i = 0; i < tsize; i++) dataLogF("\t.short\t0x%x\n", tcode[i]); -#elif CPU(ARM_TRADITIONAL) - // gcc -c jit.s - // objdump -D jit.o - static unsigned codeCount = 0; - unsigned int* tcode = static_cast<unsigned int*>(code); - size_t tsize = size / sizeof(unsigned int); - char nameBuf[128]; - snprintf(nameBuf, sizeof(nameBuf), "_jsc_jit%u", codeCount++); - dataLogF("\t.globl\t%s\n" - "\t.align 4\n" - "\t.code 32\n" - "\t.text\n" - "# %p\n" - "%s:\n", nameBuf, code, nameBuf); - - for (unsigned i = 0; i < tsize; i++) - dataLogF("\t.long\t0x%x\n", tcode[i]); #endif } #endif diff --git a/src/3rdparty/masm/assembler/LinkBuffer.h b/src/3rdparty/masm/assembler/LinkBuffer.h index bfd0e402ca..c79b0663c8 100644 --- a/src/3rdparty/masm/assembler/LinkBuffer.h +++ b/src/3rdparty/masm/assembler/LinkBuffer.h @@ -26,6 +26,8 @@ #ifndef LinkBuffer_h #define LinkBuffer_h +#include <Platform.h> + #if ENABLE(ASSEMBLER) #define DUMP_LINK_STATISTICS 0 @@ -66,7 +68,7 @@ struct DefaultExecutableOffsetCalculator { // template <typename MacroAssembler, template <typename T> class ExecutableOffsetCalculator> class LinkBufferBase { - WTF_MAKE_NONCOPYABLE(LinkBufferBase); + WTF_MAKE_NONCOPYABLE(LinkBufferBase) typedef MacroAssemblerCodeRef CodeRef; typedef MacroAssemblerCodePtr CodePtr; typedef typename MacroAssembler::Label Label; @@ -209,7 +211,7 @@ public: // displaying disassembly. inline CodeRef finalizeCodeWithoutDisassembly(); - inline CodeRef finalizeCodeWithDisassembly(const char* format, ...) WTF_ATTRIBUTE_PRINTF(2, 3); + inline CodeRef finalizeCodeWithDisassembly(const char *jitKind, const char* format, ...) WTF_ATTRIBUTE_PRINTF(3, 4); CodePtr trampolineAt(Label label) { @@ -263,9 +265,9 @@ protected: #endif }; -#define FINALIZE_CODE_IF(condition, linkBufferReference, dataLogFArgumentsForHeading) \ +#define FINALIZE_CODE_IF(condition, linkBufferReference, jitKind, dataLogFArgumentsForHeading) \ (UNLIKELY((condition)) \ - ? ((linkBufferReference).finalizeCodeWithDisassembly dataLogFArgumentsForHeading) \ + ? ((linkBufferReference).finalizeCodeWithDisassembly (jitKind, dataLogFArgumentsForHeading)) \ : (linkBufferReference).finalizeCodeWithoutDisassembly()) // Use this to finalize code, like so: @@ -284,11 +286,11 @@ protected: // Note that the dataLogFArgumentsForHeading are only evaluated when showDisassembly // is true, so you can hide expensive disassembly-only computations inside there. -#define FINALIZE_CODE(linkBufferReference, dataLogFArgumentsForHeading) \ - FINALIZE_CODE_IF(Options::showDisassembly(), linkBufferReference, dataLogFArgumentsForHeading) +#define FINALIZE_CODE(linkBufferReference, jitKind, dataLogFArgumentsForHeading) \ + FINALIZE_CODE_IF(Options::showDisassembly(), linkBufferReference, jitKind, dataLogFArgumentsForHeading) -#define FINALIZE_DFG_CODE(linkBufferReference, dataLogFArgumentsForHeading) \ - FINALIZE_CODE_IF((Options::showDisassembly() || Options::showDFGDisassembly()), linkBufferReference, dataLogFArgumentsForHeading) +#define FINALIZE_DFG_CODE(linkBufferReference, jitKind, dataLogFArgumentsForHeading) \ + FINALIZE_CODE_IF((Options::showDisassembly() || Options::showDFGDisassembly()), linkBufferReference, jitKind, dataLogFArgumentsForHeading) template <typename MacroAssembler, template <typename T> class ExecutableOffsetCalculator> @@ -300,13 +302,13 @@ inline typename LinkBufferBase<MacroAssembler, ExecutableOffsetCalculator>::Code } template <typename MacroAssembler, template <typename T> class ExecutableOffsetCalculator> -inline typename LinkBufferBase<MacroAssembler, ExecutableOffsetCalculator>::CodeRef LinkBufferBase<MacroAssembler, ExecutableOffsetCalculator>::finalizeCodeWithDisassembly(const char* format, ...) +inline typename LinkBufferBase<MacroAssembler, ExecutableOffsetCalculator>::CodeRef LinkBufferBase<MacroAssembler, ExecutableOffsetCalculator>::finalizeCodeWithDisassembly(const char *jitKind, const char* format, ...) { ASSERT(Options::showDisassembly() || Options::showDFGDisassembly()); CodeRef result = finalizeCodeWithoutDisassembly(); - dataLogF("Generated JIT code for "); + dataLogF("Generated %s code for ", jitKind); va_list argList; va_start(argList, format); WTF::dataLogFV(format, argList); @@ -350,8 +352,8 @@ inline void LinkBufferBase<MacroAssembler, ExecutableOffsetCalculator>::performF #endif ASSERT(m_size <= INT_MAX); - ExecutableAllocator::makeExecutable(code(), static_cast<int>(m_size)); MacroAssembler::cacheFlush(code(), m_size); + ExecutableAllocator::makeExecutable(code(), static_cast<int>(m_size)); } template <typename MacroAssembler> @@ -418,8 +420,8 @@ inline void BranchCompactingLinkBuffer<MacroAssembler>::performFinalization() this->m_completed = true; #endif - ExecutableAllocator::makeExecutable(code(), m_initialSize); MacroAssembler::cacheFlush(code(), m_size); + ExecutableAllocator::makeExecutable(code(), m_initialSize); } template <typename MacroAssembler> @@ -518,6 +520,18 @@ public: #endif +#if CPU(ARM_THUMB2) +typedef LinkBuffer<MacroAssembler<MacroAssemblerARMv7>> DefaultLinkBuffer; +#elif CPU(ARM64) +typedef LinkBuffer<MacroAssembler<MacroAssemblerARM64>> DefaultLinkBuffer; +#elif CPU(MIPS) +typedef LinkBuffer<MacroAssembler<MacroAssemblerMIPS>> DefaultLinkBuffer; +#elif CPU(X86) +typedef LinkBuffer<MacroAssembler<MacroAssemblerX86>> DefaultLinkBuffer; +#elif CPU(X86_64) +typedef LinkBuffer<MacroAssembler<MacroAssemblerX86_64>> DefaultLinkBuffer; +#endif + } // namespace JSC #endif // ENABLE(ASSEMBLER) diff --git a/src/3rdparty/masm/assembler/MacroAssembler.h b/src/3rdparty/masm/assembler/MacroAssembler.h index 20ddcadae1..b442a81bd0 100644 --- a/src/3rdparty/masm/assembler/MacroAssembler.h +++ b/src/3rdparty/masm/assembler/MacroAssembler.h @@ -39,10 +39,6 @@ namespace JSC { typedef MacroAssemblerARMv7 MacroAssemblerBase; }; #elif CPU(ARM64) namespace JSC { typedef MacroAssemblerARM64 MacroAssemblerBase; }; -#elif CPU(ARM_TRADITIONAL) -#include "MacroAssemblerARM.h" -namespace JSC { typedef MacroAssemblerARM MacroAssemblerBase; }; - #elif CPU(MIPS) #include "MacroAssemblerMIPS.h" namespace JSC { @@ -57,12 +53,6 @@ namespace JSC { typedef MacroAssemblerX86 MacroAssemblerBase; }; #include "MacroAssemblerX86_64.h" namespace JSC { typedef MacroAssemblerX86_64 MacroAssemblerBase; }; -#elif CPU(SH4) -#include "MacroAssemblerSH4.h" -namespace JSC { -typedef MacroAssemblerSH4 MacroAssemblerBase; -}; - #else #error "The MacroAssembler is not supported on this platform." #endif @@ -805,7 +795,7 @@ public: Jump branchPtr(RelationalCondition cond, RegisterID left, RegisterID right) { - return branch64(cond, left, right); + return this->branch64(cond, left, right); } Jump branchPtr(RelationalCondition cond, RegisterID left, TrustedImmPtr right) @@ -840,7 +830,7 @@ public: Jump branchTestPtr(ResultCondition cond, RegisterID reg, TrustedImm32 mask = TrustedImm32(-1)) { - return branchTest64(cond, reg, mask); + return this->branchTest64(cond, reg, mask); } Jump branchTestPtr(ResultCondition cond, Address address, TrustedImm32 mask = TrustedImm32(-1)) @@ -1489,16 +1479,12 @@ public: typedef MacroAssembler<MacroAssemblerARMv7> DefaultMacroAssembler; #elif CPU(ARM64) typedef MacroAssembler<MacroAssemblerARM64> DefaultMacroAssembler; -#elif CPU(ARM_TRADITIONAL) -typedef MacroAssembler<MacroAssemblerARM> DefaultMacroAssembler; #elif CPU(MIPS) typedef MacroAssembler<MacroAssemblerMIPS> DefaultMacroAssembler; #elif CPU(X86) typedef MacroAssembler<MacroAssemblerX86> DefaultMacroAssembler; #elif CPU(X86_64) typedef MacroAssembler<MacroAssemblerX86_64> DefaultMacroAssembler; -#elif CPU(SH4) -typedef JSC::MacroAssemblerSH4 DefaultMacroAssembler; #endif } // namespace JSC diff --git a/src/3rdparty/masm/assembler/MacroAssemblerARM.cpp b/src/3rdparty/masm/assembler/MacroAssemblerARM.cpp deleted file mode 100644 index 3ca9c7da80..0000000000 --- a/src/3rdparty/masm/assembler/MacroAssemblerARM.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2009 University of Szeged - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#if ENABLE(ASSEMBLER) && CPU(ARM_TRADITIONAL) - -#include "MacroAssemblerARM.h" - -#if OS(LINUX) -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <elf.h> -#define HWCAP_VFP (1 << 6) -#endif - -namespace JSC { - -static bool isVFPPresent() -{ -#if OS(LINUX) - int fd = open("/proc/self/auxv", O_RDONLY); - if (fd > 0) { - Elf32_auxv_t aux; - while (read(fd, &aux, sizeof(Elf32_auxv_t))) { - if (aux.a_type == AT_HWCAP) { - close(fd); - return aux.a_un.a_val & HWCAP_VFP; - } - } - close(fd); - } -#endif - -#if (COMPILER(RVCT) && defined(__TARGET_FPU_VFP)) || (COMPILER(GCC) && defined(__VFP_FP__)) - return true; -#else - return false; -#endif -} - -const bool MacroAssemblerARM::s_isVFPPresent = isVFPPresent(); - -#if CPU(ARMV5_OR_LOWER) -/* On ARMv5 and below, natural alignment is required. */ -void MacroAssemblerARM::load32WithUnalignedHalfWords(BaseIndex address, RegisterID dest) -{ - ARMWord op2; - - ASSERT(address.scale >= 0 && address.scale <= 3); - op2 = m_assembler.lsl(address.index, static_cast<int>(address.scale)); - - if (address.offset >= 0 && address.offset + 0x2 <= 0xff) { - m_assembler.add(ARMRegisters::S0, address.base, op2); - m_assembler.halfDtrUp(ARMAssembler::LoadUint16, dest, ARMRegisters::S0, ARMAssembler::getOp2Half(address.offset)); - m_assembler.halfDtrUp(ARMAssembler::LoadUint16, ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::getOp2Half(address.offset + 0x2)); - } else if (address.offset < 0 && address.offset >= -0xff) { - m_assembler.add(ARMRegisters::S0, address.base, op2); - m_assembler.halfDtrDown(ARMAssembler::LoadUint16, dest, ARMRegisters::S0, ARMAssembler::getOp2Half(-address.offset)); - m_assembler.halfDtrDown(ARMAssembler::LoadUint16, ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::getOp2Half(-address.offset - 0x2)); - } else { - m_assembler.moveImm(address.offset, ARMRegisters::S0); - m_assembler.add(ARMRegisters::S0, ARMRegisters::S0, op2); - m_assembler.halfDtrUpRegister(ARMAssembler::LoadUint16, dest, address.base, ARMRegisters::S0); - m_assembler.add(ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::Op2Immediate | 0x2); - m_assembler.halfDtrUpRegister(ARMAssembler::LoadUint16, ARMRegisters::S0, address.base, ARMRegisters::S0); - } - m_assembler.orr(dest, dest, m_assembler.lsl(ARMRegisters::S0, 16)); -} -#endif - -} - -#endif // ENABLE(ASSEMBLER) && CPU(ARM_TRADITIONAL) diff --git a/src/3rdparty/masm/assembler/MacroAssemblerARM.h b/src/3rdparty/masm/assembler/MacroAssemblerARM.h deleted file mode 100644 index 268fe5fe73..0000000000 --- a/src/3rdparty/masm/assembler/MacroAssemblerARM.h +++ /dev/null @@ -1,1386 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. - * Copyright (C) 2009, 2010 University of Szeged - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MacroAssemblerARM_h -#define MacroAssemblerARM_h - -#if ENABLE(ASSEMBLER) && CPU(ARM_TRADITIONAL) - -#include "ARMAssembler.h" -#include "AbstractMacroAssembler.h" - -namespace JSC { - -class MacroAssemblerARM : public AbstractMacroAssembler<ARMAssembler> { - static const int DoubleConditionMask = 0x0f; - static const int DoubleConditionBitSpecial = 0x10; - COMPILE_ASSERT(!(DoubleConditionBitSpecial & DoubleConditionMask), DoubleConditionBitSpecial_should_not_interfere_with_ARMAssembler_Condition_codes); -public: - typedef ARMRegisters::FPRegisterID FPRegisterID; - - enum RelationalCondition { - Equal = ARMAssembler::EQ, - NotEqual = ARMAssembler::NE, - Above = ARMAssembler::HI, - AboveOrEqual = ARMAssembler::CS, - Below = ARMAssembler::CC, - BelowOrEqual = ARMAssembler::LS, - GreaterThan = ARMAssembler::GT, - GreaterThanOrEqual = ARMAssembler::GE, - LessThan = ARMAssembler::LT, - LessThanOrEqual = ARMAssembler::LE - }; - - enum ResultCondition { - Overflow = ARMAssembler::VS, - Signed = ARMAssembler::MI, - Zero = ARMAssembler::EQ, - NonZero = ARMAssembler::NE - }; - - enum DoubleCondition { - // These conditions will only evaluate to true if the comparison is ordered - i.e. neither operand is NaN. - DoubleEqual = ARMAssembler::EQ, - DoubleNotEqual = ARMAssembler::NE | DoubleConditionBitSpecial, - DoubleGreaterThan = ARMAssembler::GT, - DoubleGreaterThanOrEqual = ARMAssembler::GE, - DoubleLessThan = ARMAssembler::CC, - DoubleLessThanOrEqual = ARMAssembler::LS, - // If either operand is NaN, these conditions always evaluate to true. - DoubleEqualOrUnordered = ARMAssembler::EQ | DoubleConditionBitSpecial, - DoubleNotEqualOrUnordered = ARMAssembler::NE, - DoubleGreaterThanOrUnordered = ARMAssembler::HI, - DoubleGreaterThanOrEqualOrUnordered = ARMAssembler::CS, - DoubleLessThanOrUnordered = ARMAssembler::LT, - DoubleLessThanOrEqualOrUnordered = ARMAssembler::LE, - }; - - static const RegisterID stackPointerRegister = ARMRegisters::sp; - static const RegisterID linkRegister = ARMRegisters::lr; - - static const Scale ScalePtr = TimesFour; - - void add32(RegisterID src, RegisterID dest) - { - m_assembler.adds(dest, dest, src); - } - - void add32(RegisterID op1, RegisterID op2, RegisterID dest) - { - m_assembler.adds(dest, op1, op2); - } - - void add32(TrustedImm32 imm, Address address) - { - load32(address, ARMRegisters::S1); - add32(imm, ARMRegisters::S1); - store32(ARMRegisters::S1, address); - } - - void add32(TrustedImm32 imm, RegisterID dest) - { - m_assembler.adds(dest, dest, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); - } - - void add32(AbsoluteAddress src, RegisterID dest) - { - move(TrustedImmPtr(src.m_ptr), ARMRegisters::S1); - m_assembler.dtrUp(ARMAssembler::LoadUint32, ARMRegisters::S1, ARMRegisters::S1, 0); - add32(ARMRegisters::S1, dest); - } - - void add32(Address src, RegisterID dest) - { - load32(src, ARMRegisters::S1); - add32(ARMRegisters::S1, dest); - } - - void add32(RegisterID src, TrustedImm32 imm, RegisterID dest) - { - m_assembler.adds(dest, src, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); - } - - void and32(RegisterID src, RegisterID dest) - { - m_assembler.bitAnds(dest, dest, src); - } - - void and32(RegisterID op1, RegisterID op2, RegisterID dest) - { - m_assembler.bitAnds(dest, op1, op2); - } - - void and32(TrustedImm32 imm, RegisterID dest) - { - ARMWord w = m_assembler.getImm(imm.m_value, ARMRegisters::S0, true); - if (w & ARMAssembler::Op2InvertedImmediate) - m_assembler.bics(dest, dest, w & ~ARMAssembler::Op2InvertedImmediate); - else - m_assembler.bitAnds(dest, dest, w); - } - - void and32(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - ARMWord w = m_assembler.getImm(imm.m_value, ARMRegisters::S0, true); - if (w & ARMAssembler::Op2InvertedImmediate) - m_assembler.bics(dest, src, w & ~ARMAssembler::Op2InvertedImmediate); - else - m_assembler.bitAnds(dest, src, w); - } - - void and32(Address src, RegisterID dest) - { - load32(src, ARMRegisters::S1); - and32(ARMRegisters::S1, dest); - } - - void lshift32(RegisterID shiftAmount, RegisterID dest) - { - lshift32(dest, shiftAmount, dest); - } - - void lshift32(RegisterID src, RegisterID shiftAmount, RegisterID dest) - { - ARMWord w = ARMAssembler::getOp2Byte(0x1f); - m_assembler.bitAnd(ARMRegisters::S0, shiftAmount, w); - - m_assembler.movs(dest, m_assembler.lslRegister(src, ARMRegisters::S0)); - } - - void lshift32(TrustedImm32 imm, RegisterID dest) - { - m_assembler.movs(dest, m_assembler.lsl(dest, imm.m_value & 0x1f)); - } - - void lshift32(RegisterID src, TrustedImm32 imm, RegisterID dest) - { - m_assembler.movs(dest, m_assembler.lsl(src, imm.m_value & 0x1f)); - } - - void mul32(RegisterID op1, RegisterID op2, RegisterID dest) - { - if (op2 == dest) { - if (op1 == dest) { - move(op2, ARMRegisters::S0); - op2 = ARMRegisters::S0; - } else { - // Swap the operands. - RegisterID tmp = op1; - op1 = op2; - op2 = tmp; - } - } - m_assembler.muls(dest, op1, op2); - } - - void mul32(RegisterID src, RegisterID dest) - { - mul32(src, dest, dest); - } - - void mul32(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - move(imm, ARMRegisters::S0); - m_assembler.muls(dest, src, ARMRegisters::S0); - } - - void neg32(RegisterID srcDest) - { - m_assembler.rsbs(srcDest, srcDest, ARMAssembler::getOp2Byte(0)); - } - - void or32(RegisterID src, RegisterID dest) - { - m_assembler.orrs(dest, dest, src); - } - - void or32(RegisterID src, AbsoluteAddress dest) - { - move(TrustedImmPtr(dest.m_ptr), ARMRegisters::S0); - load32(Address(ARMRegisters::S0), ARMRegisters::S1); - or32(src, ARMRegisters::S1); - store32(ARMRegisters::S1, ARMRegisters::S0); - } - - void or32(TrustedImm32 imm, RegisterID dest) - { - m_assembler.orrs(dest, dest, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); - } - - void or32(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - m_assembler.orrs(dest, src, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); - } - - void or32(RegisterID op1, RegisterID op2, RegisterID dest) - { - m_assembler.orrs(dest, op1, op2); - } - - void rshift32(RegisterID shiftAmount, RegisterID dest) - { - rshift32(dest, shiftAmount, dest); - } - - void rshift32(RegisterID src, RegisterID shiftAmount, RegisterID dest) - { - ARMWord w = ARMAssembler::getOp2Byte(0x1f); - m_assembler.bitAnd(ARMRegisters::S0, shiftAmount, w); - - m_assembler.movs(dest, m_assembler.asrRegister(src, ARMRegisters::S0)); - } - - void rshift32(TrustedImm32 imm, RegisterID dest) - { - rshift32(dest, imm, dest); - } - - void rshift32(RegisterID src, TrustedImm32 imm, RegisterID dest) - { - m_assembler.movs(dest, m_assembler.asr(src, imm.m_value & 0x1f)); - } - - void urshift32(RegisterID shiftAmount, RegisterID dest) - { - urshift32(dest, shiftAmount, dest); - } - - void urshift32(RegisterID src, RegisterID shiftAmount, RegisterID dest) - { - ARMWord w = ARMAssembler::getOp2Byte(0x1f); - m_assembler.bitAnd(ARMRegisters::S0, shiftAmount, w); - - m_assembler.movs(dest, m_assembler.lsrRegister(src, ARMRegisters::S0)); - } - - void urshift32(TrustedImm32 imm, RegisterID dest) - { - m_assembler.movs(dest, m_assembler.lsr(dest, imm.m_value & 0x1f)); - } - - void urshift32(RegisterID src, TrustedImm32 imm, RegisterID dest) - { - m_assembler.movs(dest, m_assembler.lsr(src, imm.m_value & 0x1f)); - } - - void sub32(RegisterID src, RegisterID dest) - { - m_assembler.subs(dest, dest, src); - } - - void sub32(TrustedImm32 imm, RegisterID dest) - { - m_assembler.subs(dest, dest, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); - } - - void sub32(TrustedImm32 imm, Address address) - { - load32(address, ARMRegisters::S1); - sub32(imm, ARMRegisters::S1); - store32(ARMRegisters::S1, address); - } - - void sub32(Address src, RegisterID dest) - { - load32(src, ARMRegisters::S1); - sub32(ARMRegisters::S1, dest); - } - - void sub32(RegisterID src, TrustedImm32 imm, RegisterID dest) - { - m_assembler.subs(dest, src, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); - } - - void xor32(RegisterID src, RegisterID dest) - { - m_assembler.eors(dest, dest, src); - } - - void xor32(RegisterID op1, RegisterID op2, RegisterID dest) - { - m_assembler.eors(dest, op1, op2); - } - - void xor32(TrustedImm32 imm, RegisterID dest) - { - if (imm.m_value == -1) - m_assembler.mvns(dest, dest); - else - m_assembler.eors(dest, dest, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); - } - - void xor32(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - if (imm.m_value == -1) - m_assembler.mvns(dest, src); - else - m_assembler.eors(dest, src, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); - } - - void countLeadingZeros32(RegisterID src, RegisterID dest) - { -#if WTF_ARM_ARCH_AT_LEAST(5) - m_assembler.clz(dest, src); -#else - UNUSED_PARAM(src); - UNUSED_PARAM(dest); - RELEASE_ASSERT_NOT_REACHED(); -#endif - } - - void load8(ImplicitAddress address, RegisterID dest) - { - m_assembler.dataTransfer32(ARMAssembler::LoadUint8, dest, address.base, address.offset); - } - - void load8(BaseIndex address, RegisterID dest) - { - m_assembler.baseIndexTransfer32(ARMAssembler::LoadUint8, dest, address.base, address.index, static_cast<int>(address.scale), address.offset); - } - - void load8Signed(BaseIndex address, RegisterID dest) - { - m_assembler.baseIndexTransfer16(ARMAssembler::LoadInt8, dest, address.base, address.index, static_cast<int>(address.scale), address.offset); - } - - void load16(ImplicitAddress address, RegisterID dest) - { - m_assembler.dataTransfer16(ARMAssembler::LoadUint16, dest, address.base, address.offset); - } - - void load16(BaseIndex address, RegisterID dest) - { - m_assembler.baseIndexTransfer16(ARMAssembler::LoadUint16, dest, address.base, address.index, static_cast<int>(address.scale), address.offset); - } - - void load16Signed(BaseIndex address, RegisterID dest) - { - m_assembler.baseIndexTransfer16(ARMAssembler::LoadInt16, dest, address.base, address.index, static_cast<int>(address.scale), address.offset); - } - - void load32(ImplicitAddress address, RegisterID dest) - { - m_assembler.dataTransfer32(ARMAssembler::LoadUint32, dest, address.base, address.offset); - } - - void load32(BaseIndex address, RegisterID dest) - { - m_assembler.baseIndexTransfer32(ARMAssembler::LoadUint32, dest, address.base, address.index, static_cast<int>(address.scale), address.offset); - } - -#if CPU(ARMV5_OR_LOWER) - void load32WithUnalignedHalfWords(BaseIndex address, RegisterID dest); -#else - void load32WithUnalignedHalfWords(BaseIndex address, RegisterID dest) - { - load32(address, dest); - } -#endif - - void load16Unaligned(BaseIndex address, RegisterID dest) - { - load16(address, dest); - } - - ConvertibleLoadLabel convertibleLoadPtr(Address address, RegisterID dest) - { - ConvertibleLoadLabel result(this); - ASSERT(address.offset >= 0 && address.offset <= 255); - m_assembler.dtrUp(ARMAssembler::LoadUint32, dest, address.base, address.offset); - return result; - } - - DataLabel32 load32WithAddressOffsetPatch(Address address, RegisterID dest) - { - DataLabel32 dataLabel(this); - m_assembler.ldrUniqueImmediate(ARMRegisters::S0, 0); - m_assembler.dtrUpRegister(ARMAssembler::LoadUint32, dest, address.base, ARMRegisters::S0); - return dataLabel; - } - - static bool isCompactPtrAlignedAddressOffset(ptrdiff_t value) - { - return value >= -4095 && value <= 4095; - } - - DataLabelCompact load32WithCompactAddressOffsetPatch(Address address, RegisterID dest) - { - DataLabelCompact dataLabel(this); - ASSERT(isCompactPtrAlignedAddressOffset(address.offset)); - if (address.offset >= 0) - m_assembler.dtrUp(ARMAssembler::LoadUint32, dest, address.base, address.offset); - else - m_assembler.dtrDown(ARMAssembler::LoadUint32, dest, address.base, address.offset); - return dataLabel; - } - - DataLabel32 store32WithAddressOffsetPatch(RegisterID src, Address address) - { - DataLabel32 dataLabel(this); - m_assembler.ldrUniqueImmediate(ARMRegisters::S0, 0); - m_assembler.dtrUpRegister(ARMAssembler::StoreUint32, src, address.base, ARMRegisters::S0); - return dataLabel; - } - - void store8(RegisterID src, BaseIndex address) - { - m_assembler.baseIndexTransfer32(ARMAssembler::StoreUint8, src, address.base, address.index, static_cast<int>(address.scale), address.offset); - } - - void store8(TrustedImm32 imm, const void* address) - { - move(TrustedImm32(reinterpret_cast<ARMWord>(address)), ARMRegisters::S0); - move(imm, ARMRegisters::S1); - m_assembler.dtrUp(ARMAssembler::StoreUint8, ARMRegisters::S1, ARMRegisters::S0, 0); - } - - void store16(RegisterID src, BaseIndex address) - { - m_assembler.baseIndexTransfer16(ARMAssembler::StoreUint16, src, address.base, address.index, static_cast<int>(address.scale), address.offset); - } - - void store32(RegisterID src, ImplicitAddress address) - { - m_assembler.dataTransfer32(ARMAssembler::StoreUint32, src, address.base, address.offset); - } - - void store32(RegisterID src, BaseIndex address) - { - m_assembler.baseIndexTransfer32(ARMAssembler::StoreUint32, src, address.base, address.index, static_cast<int>(address.scale), address.offset); - } - - void store32(TrustedImm32 imm, ImplicitAddress address) - { - move(imm, ARMRegisters::S1); - store32(ARMRegisters::S1, address); - } - - void store32(TrustedImm32 imm, BaseIndex address) - { - move(imm, ARMRegisters::S1); - m_assembler.baseIndexTransfer32(ARMAssembler::StoreUint32, ARMRegisters::S1, address.base, address.index, static_cast<int>(address.scale), address.offset); - } - - void store32(RegisterID src, const void* address) - { - m_assembler.ldrUniqueImmediate(ARMRegisters::S0, reinterpret_cast<ARMWord>(address)); - m_assembler.dtrUp(ARMAssembler::StoreUint32, src, ARMRegisters::S0, 0); - } - - void store32(TrustedImm32 imm, const void* address) - { - m_assembler.ldrUniqueImmediate(ARMRegisters::S0, reinterpret_cast<ARMWord>(address)); - m_assembler.moveImm(imm.m_value, ARMRegisters::S1); - m_assembler.dtrUp(ARMAssembler::StoreUint32, ARMRegisters::S1, ARMRegisters::S0, 0); - } - - void pop(RegisterID dest) - { - m_assembler.pop(dest); - } - - void push(RegisterID src) - { - m_assembler.push(src); - } - - void push(Address address) - { - load32(address, ARMRegisters::S1); - push(ARMRegisters::S1); - } - - void push(TrustedImm32 imm) - { - move(imm, ARMRegisters::S0); - push(ARMRegisters::S0); - } - - void move(TrustedImm32 imm, RegisterID dest) - { - m_assembler.moveImm(imm.m_value, dest); - } - - void move(RegisterID src, RegisterID dest) - { - if (src != dest) - m_assembler.mov(dest, src); - } - - void move(TrustedImmPtr imm, RegisterID dest) - { - move(TrustedImm32(imm), dest); - } - - void swap(RegisterID reg1, RegisterID reg2) - { - move(reg1, ARMRegisters::S0); - move(reg2, reg1); - move(ARMRegisters::S0, reg2); - } - - void signExtend32ToPtr(RegisterID src, RegisterID dest) - { - if (src != dest) - move(src, dest); - } - - void zeroExtend32ToPtr(RegisterID src, RegisterID dest) - { - if (src != dest) - move(src, dest); - } - - Jump branch8(RelationalCondition cond, Address left, TrustedImm32 right) - { - load8(left, ARMRegisters::S1); - return branch32(cond, ARMRegisters::S1, right); - } - - Jump branch8(RelationalCondition cond, BaseIndex left, TrustedImm32 right) - { - ASSERT(!(right.m_value & 0xFFFFFF00)); - load8(left, ARMRegisters::S1); - return branch32(cond, ARMRegisters::S1, right); - } - - Jump branch32(RelationalCondition cond, RegisterID left, RegisterID right, int useConstantPool = 0) - { - m_assembler.cmp(left, right); - return Jump(m_assembler.jmp(ARMCondition(cond), useConstantPool)); - } - - Jump branch32(RelationalCondition cond, RegisterID left, TrustedImm32 right, int useConstantPool = 0) - { - internalCompare32(left, right); - return Jump(m_assembler.jmp(ARMCondition(cond), useConstantPool)); - } - - Jump branch32(RelationalCondition cond, RegisterID left, Address right) - { - load32(right, ARMRegisters::S1); - return branch32(cond, left, ARMRegisters::S1); - } - - Jump branch32(RelationalCondition cond, Address left, RegisterID right) - { - load32(left, ARMRegisters::S1); - return branch32(cond, ARMRegisters::S1, right); - } - - Jump branch32(RelationalCondition cond, Address left, TrustedImm32 right) - { - load32(left, ARMRegisters::S1); - return branch32(cond, ARMRegisters::S1, right); - } - - Jump branch32(RelationalCondition cond, BaseIndex left, TrustedImm32 right) - { - load32(left, ARMRegisters::S1); - return branch32(cond, ARMRegisters::S1, right); - } - - Jump branch32WithUnalignedHalfWords(RelationalCondition cond, BaseIndex left, TrustedImm32 right) - { - load32WithUnalignedHalfWords(left, ARMRegisters::S1); - return branch32(cond, ARMRegisters::S1, right); - } - - Jump branchTest8(ResultCondition cond, Address address, TrustedImm32 mask = TrustedImm32(-1)) - { - load8(address, ARMRegisters::S1); - return branchTest32(cond, ARMRegisters::S1, mask); - } - - Jump branchTest8(ResultCondition cond, AbsoluteAddress address, TrustedImm32 mask = TrustedImm32(-1)) - { - move(TrustedImmPtr(address.m_ptr), ARMRegisters::S1); - load8(Address(ARMRegisters::S1), ARMRegisters::S1); - return branchTest32(cond, ARMRegisters::S1, mask); - } - - Jump branchTest32(ResultCondition cond, RegisterID reg, RegisterID mask) - { - ASSERT((cond == Zero) || (cond == NonZero)); - m_assembler.tst(reg, mask); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - Jump branchTest32(ResultCondition cond, RegisterID reg, TrustedImm32 mask = TrustedImm32(-1)) - { - ASSERT((cond == Zero) || (cond == NonZero)); - ARMWord w = m_assembler.getImm(mask.m_value, ARMRegisters::S0, true); - if (w & ARMAssembler::Op2InvertedImmediate) - m_assembler.bics(ARMRegisters::S0, reg, w & ~ARMAssembler::Op2InvertedImmediate); - else - m_assembler.tst(reg, w); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - Jump branchTest32(ResultCondition cond, Address address, TrustedImm32 mask = TrustedImm32(-1)) - { - load32(address, ARMRegisters::S1); - return branchTest32(cond, ARMRegisters::S1, mask); - } - - Jump branchTest32(ResultCondition cond, BaseIndex address, TrustedImm32 mask = TrustedImm32(-1)) - { - load32(address, ARMRegisters::S1); - return branchTest32(cond, ARMRegisters::S1, mask); - } - - Jump jump() - { - return Jump(m_assembler.jmp()); - } - - void jump(RegisterID target) - { - m_assembler.bx(target); - } - - void jump(Address address) - { - load32(address, ARMRegisters::pc); - } - - void jump(AbsoluteAddress address) - { - move(TrustedImmPtr(address.m_ptr), ARMRegisters::S0); - load32(Address(ARMRegisters::S0, 0), ARMRegisters::pc); - } - - void moveDoubleToInts(FPRegisterID src, RegisterID dest1, RegisterID dest2) - { - m_assembler.vmov(dest1, dest2, src); - } - - void moveIntsToDouble(RegisterID src1, RegisterID src2, FPRegisterID dest, FPRegisterID) - { - m_assembler.vmov(dest, src1, src2); - } - - Jump branchAdd32(ResultCondition cond, RegisterID src, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - add32(src, dest); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - Jump branchAdd32(ResultCondition cond, RegisterID op1, RegisterID op2, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - add32(op1, op2, dest); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - Jump branchAdd32(ResultCondition cond, TrustedImm32 imm, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - add32(imm, dest); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - Jump branchAdd32(ResultCondition cond, RegisterID src, TrustedImm32 imm, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - add32(src, imm, dest); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - Jump branchAdd32(ResultCondition cond, TrustedImm32 imm, AbsoluteAddress dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - add32(imm, dest); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - void mull32(RegisterID op1, RegisterID op2, RegisterID dest) - { - if (op2 == dest) { - if (op1 == dest) { - move(op2, ARMRegisters::S0); - op2 = ARMRegisters::S0; - } else { - // Swap the operands. - RegisterID tmp = op1; - op1 = op2; - op2 = tmp; - } - } - m_assembler.mull(ARMRegisters::S1, dest, op1, op2); - m_assembler.cmp(ARMRegisters::S1, m_assembler.asr(dest, 31)); - } - - Jump branchMul32(ResultCondition cond, RegisterID src1, RegisterID src2, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - if (cond == Overflow) { - mull32(src1, src2, dest); - cond = NonZero; - } - else - mul32(src1, src2, dest); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - Jump branchMul32(ResultCondition cond, RegisterID src, RegisterID dest) - { - return branchMul32(cond, src, dest, dest); - } - - Jump branchMul32(ResultCondition cond, TrustedImm32 imm, RegisterID src, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - if (cond == Overflow) { - move(imm, ARMRegisters::S0); - mull32(ARMRegisters::S0, src, dest); - cond = NonZero; - } - else - mul32(imm, src, dest); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - Jump branchSub32(ResultCondition cond, RegisterID src, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - sub32(src, dest); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - Jump branchSub32(ResultCondition cond, TrustedImm32 imm, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - sub32(imm, dest); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - Jump branchSub32(ResultCondition cond, RegisterID src, TrustedImm32 imm, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - sub32(src, imm, dest); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - Jump branchSub32(ResultCondition cond, RegisterID op1, RegisterID op2, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - m_assembler.subs(dest, op1, op2); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - Jump branchNeg32(ResultCondition cond, RegisterID srcDest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - neg32(srcDest); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - Jump branchOr32(ResultCondition cond, RegisterID src, RegisterID dest) - { - ASSERT((cond == Signed) || (cond == Zero) || (cond == NonZero)); - or32(src, dest); - return Jump(m_assembler.jmp(ARMCondition(cond))); - } - - PatchableJump patchableBranch32(RelationalCondition cond, RegisterID reg, TrustedImm32 imm) - { - internalCompare32(reg, imm); - Jump jump(m_assembler.loadBranchTarget(ARMRegisters::S1, ARMCondition(cond), true)); - m_assembler.bx(ARMRegisters::S1, ARMCondition(cond)); - return PatchableJump(jump); - } - - void breakpoint() - { - m_assembler.bkpt(0); - } - - Call nearCall() - { - m_assembler.loadBranchTarget(ARMRegisters::S1, ARMAssembler::AL, true); - return Call(m_assembler.blx(ARMRegisters::S1), Call::LinkableNear); - } - - Call call(RegisterID target) - { - return Call(m_assembler.blx(target), Call::None); - } - - void call(Address address) - { - call32(address.base, address.offset); - } - - void ret() - { - m_assembler.bx(linkRegister); - } - - void compare32(RelationalCondition cond, RegisterID left, RegisterID right, RegisterID dest) - { - m_assembler.cmp(left, right); - m_assembler.mov(dest, ARMAssembler::getOp2Byte(0)); - m_assembler.mov(dest, ARMAssembler::getOp2Byte(1), ARMCondition(cond)); - } - - void compare32(RelationalCondition cond, RegisterID left, TrustedImm32 right, RegisterID dest) - { - m_assembler.cmp(left, m_assembler.getImm(right.m_value, ARMRegisters::S0)); - m_assembler.mov(dest, ARMAssembler::getOp2Byte(0)); - m_assembler.mov(dest, ARMAssembler::getOp2Byte(1), ARMCondition(cond)); - } - - void compare8(RelationalCondition cond, Address left, TrustedImm32 right, RegisterID dest) - { - load8(left, ARMRegisters::S1); - compare32(cond, ARMRegisters::S1, right, dest); - } - - void test32(ResultCondition cond, RegisterID reg, TrustedImm32 mask, RegisterID dest) - { - if (mask.m_value == -1) - m_assembler.cmp(0, reg); - else - m_assembler.tst(reg, m_assembler.getImm(mask.m_value, ARMRegisters::S0)); - m_assembler.mov(dest, ARMAssembler::getOp2Byte(0)); - m_assembler.mov(dest, ARMAssembler::getOp2Byte(1), ARMCondition(cond)); - } - - void test32(ResultCondition cond, Address address, TrustedImm32 mask, RegisterID dest) - { - load32(address, ARMRegisters::S1); - test32(cond, ARMRegisters::S1, mask, dest); - } - - void test8(ResultCondition cond, Address address, TrustedImm32 mask, RegisterID dest) - { - load8(address, ARMRegisters::S1); - test32(cond, ARMRegisters::S1, mask, dest); - } - - void add32(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - m_assembler.add(dest, src, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); - } - - void add32(TrustedImm32 imm, AbsoluteAddress address) - { - load32(address.m_ptr, ARMRegisters::S1); - add32(imm, ARMRegisters::S1); - store32(ARMRegisters::S1, address.m_ptr); - } - - void add64(TrustedImm32 imm, AbsoluteAddress address) - { - ARMWord tmp; - - move(TrustedImmPtr(address.m_ptr), ARMRegisters::S1); - m_assembler.dtrUp(ARMAssembler::LoadUint32, ARMRegisters::S0, ARMRegisters::S1, 0); - - if ((tmp = ARMAssembler::getOp2(imm.m_value)) != ARMAssembler::InvalidImmediate) - m_assembler.adds(ARMRegisters::S0, ARMRegisters::S0, tmp); - else if ((tmp = ARMAssembler::getOp2(-imm.m_value)) != ARMAssembler::InvalidImmediate) - m_assembler.subs(ARMRegisters::S0, ARMRegisters::S0, tmp); - else { - m_assembler.adds(ARMRegisters::S0, ARMRegisters::S0, m_assembler.getImm(imm.m_value, ARMRegisters::S1)); - move(TrustedImmPtr(address.m_ptr), ARMRegisters::S1); - } - m_assembler.dtrUp(ARMAssembler::StoreUint32, ARMRegisters::S0, ARMRegisters::S1, 0); - - m_assembler.dtrUp(ARMAssembler::LoadUint32, ARMRegisters::S0, ARMRegisters::S1, sizeof(ARMWord)); - if (imm.m_value >= 0) - m_assembler.adc(ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::getOp2Byte(0)); - else - m_assembler.sbc(ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::getOp2Byte(0)); - m_assembler.dtrUp(ARMAssembler::StoreUint32, ARMRegisters::S0, ARMRegisters::S1, sizeof(ARMWord)); - } - - void sub32(TrustedImm32 imm, AbsoluteAddress address) - { - load32(address.m_ptr, ARMRegisters::S1); - sub32(imm, ARMRegisters::S1); - store32(ARMRegisters::S1, address.m_ptr); - } - - void load32(const void* address, RegisterID dest) - { - m_assembler.ldrUniqueImmediate(ARMRegisters::S0, reinterpret_cast<ARMWord>(address)); - m_assembler.dtrUp(ARMAssembler::LoadUint32, dest, ARMRegisters::S0, 0); - } - - Jump branch32(RelationalCondition cond, AbsoluteAddress left, RegisterID right) - { - load32(left.m_ptr, ARMRegisters::S1); - return branch32(cond, ARMRegisters::S1, right); - } - - Jump branch32(RelationalCondition cond, AbsoluteAddress left, TrustedImm32 right) - { - load32(left.m_ptr, ARMRegisters::S1); - return branch32(cond, ARMRegisters::S1, right); - } - - void relativeTableJump(RegisterID index, int scale) - { - ASSERT(scale >= 0 && scale <= 31); - m_assembler.add(ARMRegisters::pc, ARMRegisters::pc, m_assembler.lsl(index, scale)); - - // NOP the default prefetching - m_assembler.mov(ARMRegisters::r0, ARMRegisters::r0); - } - - Call call() - { - ensureSpace(2 * sizeof(ARMWord), sizeof(ARMWord)); - m_assembler.loadBranchTarget(ARMRegisters::S1, ARMAssembler::AL, true); - return Call(m_assembler.blx(ARMRegisters::S1), Call::Linkable); - } - - Call tailRecursiveCall() - { - return Call::fromTailJump(jump()); - } - - Call makeTailRecursiveCall(Jump oldJump) - { - return Call::fromTailJump(oldJump); - } - - DataLabelPtr moveWithPatch(TrustedImmPtr initialValue, RegisterID dest) - { - DataLabelPtr dataLabel(this); - m_assembler.ldrUniqueImmediate(dest, reinterpret_cast<ARMWord>(initialValue.m_value)); - return dataLabel; - } - - Jump branchPtrWithPatch(RelationalCondition cond, RegisterID left, DataLabelPtr& dataLabel, TrustedImmPtr initialRightValue = TrustedImmPtr(0)) - { - ensureSpace(3 * sizeof(ARMWord), 2 * sizeof(ARMWord)); - dataLabel = moveWithPatch(initialRightValue, ARMRegisters::S1); - Jump jump = branch32(cond, left, ARMRegisters::S1, true); - return jump; - } - - Jump branchPtrWithPatch(RelationalCondition cond, Address left, DataLabelPtr& dataLabel, TrustedImmPtr initialRightValue = TrustedImmPtr(0)) - { - load32(left, ARMRegisters::S1); - ensureSpace(3 * sizeof(ARMWord), 2 * sizeof(ARMWord)); - dataLabel = moveWithPatch(initialRightValue, ARMRegisters::S0); - Jump jump = branch32(cond, ARMRegisters::S0, ARMRegisters::S1, true); - return jump; - } - - DataLabelPtr storePtrWithPatch(TrustedImmPtr initialValue, ImplicitAddress address) - { - DataLabelPtr dataLabel = moveWithPatch(initialValue, ARMRegisters::S1); - store32(ARMRegisters::S1, address); - return dataLabel; - } - - DataLabelPtr storePtrWithPatch(ImplicitAddress address) - { - return storePtrWithPatch(TrustedImmPtr(0), address); - } - - // Floating point operators - static bool supportsFloatingPoint() - { - return s_isVFPPresent; - } - - static bool supportsFloatingPointTruncate() - { - return false; - } - - static bool supportsFloatingPointSqrt() - { - return s_isVFPPresent; - } - static bool supportsFloatingPointAbs() { return false; } - - void loadFloat(BaseIndex address, FPRegisterID dest) - { - m_assembler.baseIndexTransferFloat(ARMAssembler::LoadFloat, dest, address.base, address.index, static_cast<int>(address.scale), address.offset); - } - - void loadDouble(ImplicitAddress address, FPRegisterID dest) - { - m_assembler.dataTransferFloat(ARMAssembler::LoadDouble, dest, address.base, address.offset); - } - - void loadDouble(BaseIndex address, FPRegisterID dest) - { - m_assembler.baseIndexTransferFloat(ARMAssembler::LoadDouble, dest, address.base, address.index, static_cast<int>(address.scale), address.offset); - } - - void loadDouble(const void* address, FPRegisterID dest) - { - move(TrustedImm32(reinterpret_cast<ARMWord>(address)), ARMRegisters::S0); - m_assembler.doubleDtrUp(ARMAssembler::LoadDouble, dest, ARMRegisters::S0, 0); - } - - void storeFloat(FPRegisterID src, BaseIndex address) - { - m_assembler.baseIndexTransferFloat(ARMAssembler::StoreFloat, src, address.base, address.index, static_cast<int>(address.scale), address.offset); - } - - void storeDouble(FPRegisterID src, ImplicitAddress address) - { - m_assembler.dataTransferFloat(ARMAssembler::StoreDouble, src, address.base, address.offset); - } - - void storeDouble(FPRegisterID src, BaseIndex address) - { - m_assembler.baseIndexTransferFloat(ARMAssembler::StoreDouble, src, address.base, address.index, static_cast<int>(address.scale), address.offset); - } - - void storeDouble(FPRegisterID src, const void* address) - { - move(TrustedImm32(reinterpret_cast<ARMWord>(address)), ARMRegisters::S0); - m_assembler.dataTransferFloat(ARMAssembler::StoreDouble, src, ARMRegisters::S0, 0); - } - - void moveDouble(FPRegisterID src, FPRegisterID dest) - { - if (src != dest) - m_assembler.vmov_f64(dest, src); - } - - void addDouble(FPRegisterID src, FPRegisterID dest) - { - m_assembler.vadd_f64(dest, dest, src); - } - - void addDouble(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest) - { - m_assembler.vadd_f64(dest, op1, op2); - } - - void addDouble(Address src, FPRegisterID dest) - { - loadDouble(src, ARMRegisters::SD0); - addDouble(ARMRegisters::SD0, dest); - } - - void addDouble(AbsoluteAddress address, FPRegisterID dest) - { - loadDouble(address.m_ptr, ARMRegisters::SD0); - addDouble(ARMRegisters::SD0, dest); - } - - void divDouble(FPRegisterID src, FPRegisterID dest) - { - m_assembler.vdiv_f64(dest, dest, src); - } - - void divDouble(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest) - { - m_assembler.vdiv_f64(dest, op1, op2); - } - - void divDouble(Address src, FPRegisterID dest) - { - RELEASE_ASSERT_NOT_REACHED(); // Untested - loadDouble(src, ARMRegisters::SD0); - divDouble(ARMRegisters::SD0, dest); - } - - void subDouble(FPRegisterID src, FPRegisterID dest) - { - m_assembler.vsub_f64(dest, dest, src); - } - - void subDouble(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest) - { - m_assembler.vsub_f64(dest, op1, op2); - } - - void subDouble(Address src, FPRegisterID dest) - { - loadDouble(src, ARMRegisters::SD0); - subDouble(ARMRegisters::SD0, dest); - } - - void mulDouble(FPRegisterID src, FPRegisterID dest) - { - m_assembler.vmul_f64(dest, dest, src); - } - - void mulDouble(Address src, FPRegisterID dest) - { - loadDouble(src, ARMRegisters::SD0); - mulDouble(ARMRegisters::SD0, dest); - } - - void mulDouble(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest) - { - m_assembler.vmul_f64(dest, op1, op2); - } - - void sqrtDouble(FPRegisterID src, FPRegisterID dest) - { - m_assembler.vsqrt_f64(dest, src); - } - - void absDouble(FPRegisterID src, FPRegisterID dest) - { - m_assembler.vabs_f64(dest, src); - } - - void negateDouble(FPRegisterID src, FPRegisterID dest) - { - m_assembler.vneg_f64(dest, src); - } - - void convertInt32ToDouble(RegisterID src, FPRegisterID dest) - { - m_assembler.vmov_vfp32(dest << 1, src); - m_assembler.vcvt_f64_s32(dest, dest << 1); - } - - void convertInt32ToDouble(Address src, FPRegisterID dest) - { - load32(src, ARMRegisters::S1); - convertInt32ToDouble(ARMRegisters::S1, dest); - } - - void convertInt32ToDouble(AbsoluteAddress src, FPRegisterID dest) - { - move(TrustedImmPtr(src.m_ptr), ARMRegisters::S1); - load32(Address(ARMRegisters::S1), ARMRegisters::S1); - convertInt32ToDouble(ARMRegisters::S1, dest); - } - - void convertFloatToDouble(FPRegisterID src, FPRegisterID dst) - { - m_assembler.vcvt_f64_f32(dst, src); - } - - void convertDoubleToFloat(FPRegisterID src, FPRegisterID dst) - { - m_assembler.vcvt_f32_f64(dst, src); - } - - Jump branchDouble(DoubleCondition cond, FPRegisterID left, FPRegisterID right) - { - m_assembler.vcmp_f64(left, right); - m_assembler.vmrs_apsr(); - if (cond & DoubleConditionBitSpecial) - m_assembler.cmp(ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::VS); - return Jump(m_assembler.jmp(static_cast<ARMAssembler::Condition>(cond & ~DoubleConditionMask))); - } - - // Truncates 'src' to an integer, and places the resulting 'dest'. - // If the result is not representable as a 32 bit value, branch. - // May also branch for some values that are representable in 32 bits - // (specifically, in this case, INT_MIN). - enum BranchTruncateType { BranchIfTruncateFailed, BranchIfTruncateSuccessful }; - Jump branchTruncateDoubleToInt32(FPRegisterID src, RegisterID dest, BranchTruncateType branchType = BranchIfTruncateFailed) - { - truncateDoubleToInt32(src, dest); - - m_assembler.add(ARMRegisters::S0, dest, ARMAssembler::getOp2Byte(1)); - m_assembler.bic(ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::getOp2Byte(1)); - - ARMWord w = ARMAssembler::getOp2(0x80000000); - ASSERT(w != ARMAssembler::InvalidImmediate); - m_assembler.cmp(ARMRegisters::S0, w); - return Jump(m_assembler.jmp(branchType == BranchIfTruncateFailed ? ARMAssembler::EQ : ARMAssembler::NE)); - } - - Jump branchTruncateDoubleToUint32(FPRegisterID src, RegisterID dest, BranchTruncateType branchType = BranchIfTruncateFailed) - { - truncateDoubleToUint32(src, dest); - - m_assembler.add(ARMRegisters::S0, dest, ARMAssembler::getOp2Byte(1)); - m_assembler.bic(ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::getOp2Byte(1)); - - m_assembler.cmp(ARMRegisters::S0, ARMAssembler::getOp2Byte(0)); - return Jump(m_assembler.jmp(branchType == BranchIfTruncateFailed ? ARMAssembler::EQ : ARMAssembler::NE)); - } - - // Result is undefined if the value is outside of the integer range. - void truncateDoubleToInt32(FPRegisterID src, RegisterID dest) - { - m_assembler.vcvt_s32_f64(ARMRegisters::SD0 << 1, src); - m_assembler.vmov_arm32(dest, ARMRegisters::SD0 << 1); - } - - void truncateDoubleToUint32(FPRegisterID src, RegisterID dest) - { - m_assembler.vcvt_u32_f64(ARMRegisters::SD0 << 1, src); - m_assembler.vmov_arm32(dest, ARMRegisters::SD0 << 1); - } - - // Convert 'src' to an integer, and places the resulting 'dest'. - // If the result is not representable as a 32 bit value, branch. - // May also branch for some values that are representable in 32 bits - // (specifically, in this case, 0). - void branchConvertDoubleToInt32(FPRegisterID src, RegisterID dest, JumpList& failureCases, FPRegisterID) - { - m_assembler.vcvt_s32_f64(ARMRegisters::SD0 << 1, src); - m_assembler.vmov_arm32(dest, ARMRegisters::SD0 << 1); - - // Convert the integer result back to float & compare to the original value - if not equal or unordered (NaN) then jump. - m_assembler.vcvt_f64_s32(ARMRegisters::SD0, ARMRegisters::SD0 << 1); - failureCases.append(branchDouble(DoubleNotEqualOrUnordered, src, ARMRegisters::SD0)); - - // If the result is zero, it might have been -0.0, and 0.0 equals to -0.0 - failureCases.append(branchTest32(Zero, dest)); - } - - Jump branchDoubleNonZero(FPRegisterID reg, FPRegisterID scratch) - { - m_assembler.mov(ARMRegisters::S0, ARMAssembler::getOp2Byte(0)); - convertInt32ToDouble(ARMRegisters::S0, scratch); - return branchDouble(DoubleNotEqual, reg, scratch); - } - - Jump branchDoubleZeroOrNaN(FPRegisterID reg, FPRegisterID scratch) - { - m_assembler.mov(ARMRegisters::S0, ARMAssembler::getOp2Byte(0)); - convertInt32ToDouble(ARMRegisters::S0, scratch); - return branchDouble(DoubleEqualOrUnordered, reg, scratch); - } - - // Invert a relational condition, e.g. == becomes !=, < becomes >=, etc. - static RelationalCondition invert(RelationalCondition cond) - { - ASSERT((static_cast<uint32_t>(cond & 0x0fffffff)) == 0 && static_cast<uint32_t>(cond) < static_cast<uint32_t>(ARMAssembler::AL)); - return static_cast<RelationalCondition>(cond ^ 0x10000000); - } - - void nop() - { - m_assembler.nop(); - } - - static FunctionPtr readCallTarget(CodeLocationCall call) - { - return FunctionPtr(reinterpret_cast<void(*)()>(ARMAssembler::readCallTarget(call.dataLocation()))); - } - - static void replaceWithJump(CodeLocationLabel instructionStart, CodeLocationLabel destination) - { - ARMAssembler::replaceWithJump(instructionStart.dataLocation(), destination.dataLocation()); - } - - static ptrdiff_t maxJumpReplacementSize() - { - ARMAssembler::maxJumpReplacementSize(); - return 0; - } - - static bool canJumpReplacePatchableBranchPtrWithPatch() { return false; } - - static CodeLocationLabel startOfPatchableBranchPtrWithPatchOnAddress(CodeLocationDataLabelPtr) - { - UNREACHABLE_FOR_PLATFORM(); - return CodeLocationLabel(); - } - - static CodeLocationLabel startOfBranchPtrWithPatchOnRegister(CodeLocationDataLabelPtr label) - { - return label.labelAtOffset(0); - } - - static void revertJumpReplacementToBranchPtrWithPatch(CodeLocationLabel instructionStart, RegisterID reg, void* initialValue) - { - ARMAssembler::revertBranchPtrWithPatch(instructionStart.dataLocation(), reg, reinterpret_cast<uintptr_t>(initialValue) & 0xffff); - } - - static void revertJumpReplacementToPatchableBranchPtrWithPatch(CodeLocationLabel, Address, void*) - { - UNREACHABLE_FOR_PLATFORM(); - } - -protected: - ARMAssembler::Condition ARMCondition(RelationalCondition cond) - { - return static_cast<ARMAssembler::Condition>(cond); - } - - ARMAssembler::Condition ARMCondition(ResultCondition cond) - { - return static_cast<ARMAssembler::Condition>(cond); - } - - void ensureSpace(int insnSpace, int constSpace) - { - m_assembler.ensureSpace(insnSpace, constSpace); - } - - int sizeOfConstantPool() - { - return m_assembler.sizeOfConstantPool(); - } - - void call32(RegisterID base, int32_t offset) - { - load32(Address(base, offset), ARMRegisters::S1); - m_assembler.blx(ARMRegisters::S1); - } - -private: - template <typename, template <typename> class> friend class LinkBufferBase; - friend class RepatchBuffer; - - void internalCompare32(RegisterID left, TrustedImm32 right) - { - ARMWord tmp = (static_cast<unsigned>(right.m_value) == 0x80000000) ? ARMAssembler::InvalidImmediate : m_assembler.getOp2(-right.m_value); - if (tmp != ARMAssembler::InvalidImmediate) - m_assembler.cmn(left, tmp); - else - m_assembler.cmp(left, m_assembler.getImm(right.m_value, ARMRegisters::S0)); - } - - static void linkCall(void* code, Call call, FunctionPtr function) - { - ARMAssembler::linkCall(code, call.m_label, function.value()); - } - - static void repatchCall(CodeLocationCall call, CodeLocationLabel destination) - { - ARMAssembler::relinkCall(call.dataLocation(), destination.executableAddress()); - } - - static void repatchCall(CodeLocationCall call, FunctionPtr destination) - { - ARMAssembler::relinkCall(call.dataLocation(), destination.executableAddress()); - } - - static const bool s_isVFPPresent; -}; - -} - -#endif // ENABLE(ASSEMBLER) && CPU(ARM_TRADITIONAL) - -#endif // MacroAssemblerARM_h diff --git a/src/3rdparty/masm/assembler/MacroAssemblerARM64.h b/src/3rdparty/masm/assembler/MacroAssemblerARM64.h index ba0d7e93f8..e5a704292d 100644 --- a/src/3rdparty/masm/assembler/MacroAssemblerARM64.h +++ b/src/3rdparty/masm/assembler/MacroAssemblerARM64.h @@ -1126,6 +1126,11 @@ public: m_assembler.ldrh(dest, address.base, memoryTempRegister); } + void load16Unaligned(ImplicitAddress address, RegisterID dest) + { + load16(address, dest); + } + void load16Unaligned(BaseIndex address, RegisterID dest) { load16(address, dest); @@ -1283,6 +1288,16 @@ public: return label; } + void storePair64(RegisterID src1, RegisterID src2, RegisterID dest) + { + storePair64(src1, src2, dest, TrustedImm32(0)); + } + + void storePair64(RegisterID src1, RegisterID src2, RegisterID dest, TrustedImm32 offset) + { + m_assembler.stp<64>(src1, src2, dest, offset.m_value); + } + void store32(RegisterID src, ImplicitAddress address) { if (tryStoreWithOffset<32>(src, address.base, address.offset)) @@ -1420,6 +1435,14 @@ public: store8(dataTempRegister, address); } + void getEffectiveAddress(BaseIndex address, RegisterID dest) + { + m_assembler.add<64>(dest, address.base, address.index, ARM64Assembler::LSL, address.scale); + if (address.offset) + add64(TrustedImm32(address.offset), dest); + } + + // Floating-point operations: static bool supportsFloatingPoint() { return true; } diff --git a/src/3rdparty/masm/assembler/MacroAssemblerARMv7.h b/src/3rdparty/masm/assembler/MacroAssemblerARMv7.h index d91122d4a1..99801a0e3b 100644 --- a/src/3rdparty/masm/assembler/MacroAssemblerARMv7.h +++ b/src/3rdparty/masm/assembler/MacroAssemblerARMv7.h @@ -255,6 +255,14 @@ public: store32(dataTempRegister, address.m_ptr); } + void getEffectiveAddress(BaseIndex address, RegisterID dest) + { + m_assembler.lsl(addressTempRegister, address.index, static_cast<int>(address.scale)); + m_assembler.add(dest, address.base, addressTempRegister); + if (address.offset) + add32(TrustedImm32(address.offset), dest); + } + void add64(TrustedImm32 imm, AbsoluteAddress address) { move(TrustedImmPtr(address.m_ptr), addressTempRegister); @@ -680,6 +688,11 @@ public: load32(setupArmAddress(address), dest); } + void load16Unaligned(ImplicitAddress address, RegisterID dest) + { + load16(setupArmAddress(address), dest); + } + void load16Unaligned(BaseIndex address, RegisterID dest) { load16(setupArmAddress(address), dest); diff --git a/src/3rdparty/masm/assembler/MacroAssemblerCodeRef.h b/src/3rdparty/masm/assembler/MacroAssemblerCodeRef.h index e3c77d99e6..a7e78ad78f 100644 --- a/src/3rdparty/masm/assembler/MacroAssemblerCodeRef.h +++ b/src/3rdparty/masm/assembler/MacroAssemblerCodeRef.h @@ -27,6 +27,7 @@ #define MacroAssemblerCodeRef_h #include "Disassembler.h" +#include <wtf/Platform.h> #include "ExecutableAllocator.h" #include "LLIntData.h" #include <wtf/DataLog.h> @@ -141,6 +142,8 @@ public: ASSERT_VALID_CODE_POINTER(m_value); } + inline FunctionPtr(MacroAssemblerCodePtr ptr); + // MSVC doesn't seem to treat functions with different calling conventions as // different types; these methods already defined for fastcall, below. #if CALLING_CONVENTION_IS_STDCALL && !OS(WINDOWS) @@ -327,6 +330,12 @@ private: void* m_value; }; + +FunctionPtr::FunctionPtr(MacroAssemblerCodePtr ptr) + : m_value(ptr.executableAddress()) +{ +} + // MacroAssemblerCodeRef: // // A reference to a section of JIT generated code. A CodeRef consists of a diff --git a/src/3rdparty/masm/assembler/MacroAssemblerMIPS.h b/src/3rdparty/masm/assembler/MacroAssemblerMIPS.h index f2ad6a4470..07f0ec623f 100644 --- a/src/3rdparty/masm/assembler/MacroAssemblerMIPS.h +++ b/src/3rdparty/masm/assembler/MacroAssemblerMIPS.h @@ -27,6 +27,8 @@ #ifndef MacroAssemblerMIPS_h #define MacroAssemblerMIPS_h +#include <Platform.h> + #if ENABLE(ASSEMBLER) && CPU(MIPS) #include "AbstractMacroAssembler.h" @@ -268,6 +270,18 @@ public: m_assembler.sw(dataTempRegister, addrTempRegister, 4); } + void getEffectiveAddress(BaseIndex address, RegisterID dest) + { + if (!address.scale && !m_fixedWidth) + m_assembler.addu(dest, address.index, address.base); + else { + m_assembler.sll(addrTempRegister, address.index, address.scale); + m_assembler.addu(dest, addrTempRegister, address.base); + } + if (address.offset) + add32(TrustedImm32(address.offset), dest); + } + void and32(Address src, RegisterID dest) { load32(src, dataTempRegister); diff --git a/src/3rdparty/masm/assembler/MacroAssemblerSH4.h b/src/3rdparty/masm/assembler/MacroAssemblerSH4.h deleted file mode 100644 index 1e5a3113bb..0000000000 --- a/src/3rdparty/masm/assembler/MacroAssemblerSH4.h +++ /dev/null @@ -1,2293 +0,0 @@ -/* - * Copyright (C) 2009-2011 STMicroelectronics. All rights reserved. - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef MacroAssemblerSH4_h -#define MacroAssemblerSH4_h - -#if ENABLE(ASSEMBLER) && CPU(SH4) - -#include "SH4Assembler.h" -#include "AbstractMacroAssembler.h" -#include <wtf/Assertions.h> - -namespace JSC { - -class MacroAssemblerSH4 : public AbstractMacroAssembler<SH4Assembler> { -public: - typedef SH4Assembler::FPRegisterID FPRegisterID; - - static const Scale ScalePtr = TimesFour; - static const FPRegisterID fscratch = SH4Registers::fr10; - static const RegisterID stackPointerRegister = SH4Registers::sp; - static const RegisterID linkRegister = SH4Registers::pr; - static const RegisterID scratchReg3 = SH4Registers::r13; - - static const int MaximumCompactPtrAlignedAddressOffset = 60; - - static bool isCompactPtrAlignedAddressOffset(ptrdiff_t value) - { - return (value >= 0) && (value <= MaximumCompactPtrAlignedAddressOffset); - } - - enum RelationalCondition { - Equal = SH4Assembler::EQ, - NotEqual = SH4Assembler::NE, - Above = SH4Assembler::HI, - AboveOrEqual = SH4Assembler::HS, - Below = SH4Assembler::LI, - BelowOrEqual = SH4Assembler::LS, - GreaterThan = SH4Assembler::GT, - GreaterThanOrEqual = SH4Assembler::GE, - LessThan = SH4Assembler::LT, - LessThanOrEqual = SH4Assembler::LE - }; - - enum ResultCondition { - Overflow = SH4Assembler::OF, - Signed = SH4Assembler::SI, - Zero = SH4Assembler::EQ, - NonZero = SH4Assembler::NE - }; - - enum DoubleCondition { - // These conditions will only evaluate to true if the comparison is ordered - i.e. neither operand is NaN. - DoubleEqual = SH4Assembler::EQ, - DoubleNotEqual = SH4Assembler::NE, - DoubleGreaterThan = SH4Assembler::GT, - DoubleGreaterThanOrEqual = SH4Assembler::GE, - DoubleLessThan = SH4Assembler::LT, - DoubleLessThanOrEqual = SH4Assembler::LE, - // If either operand is NaN, these conditions always evaluate to true. - DoubleEqualOrUnordered = SH4Assembler::EQU, - DoubleNotEqualOrUnordered = SH4Assembler::NEU, - DoubleGreaterThanOrUnordered = SH4Assembler::GTU, - DoubleGreaterThanOrEqualOrUnordered = SH4Assembler::GEU, - DoubleLessThanOrUnordered = SH4Assembler::LTU, - DoubleLessThanOrEqualOrUnordered = SH4Assembler::LEU, - }; - - RegisterID claimScratch() - { - return m_assembler.claimScratch(); - } - - void releaseScratch(RegisterID reg) - { - m_assembler.releaseScratch(reg); - } - - // Integer arithmetic operations - - void add32(RegisterID src, RegisterID dest) - { - m_assembler.addlRegReg(src, dest); - } - - void add32(TrustedImm32 imm, RegisterID dest) - { - if (m_assembler.isImmediate(imm.m_value)) { - m_assembler.addlImm8r(imm.m_value, dest); - return; - } - - RegisterID scr = claimScratch(); - m_assembler.loadConstant(imm.m_value, scr); - m_assembler.addlRegReg(scr, dest); - releaseScratch(scr); - } - - void add32(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - if (src != dest) - m_assembler.movlRegReg(src, dest); - add32(imm, dest); - } - - void add32(TrustedImm32 imm, Address address) - { - RegisterID scr = claimScratch(); - load32(address, scr); - add32(imm, scr); - store32(scr, address); - releaseScratch(scr); - } - - void add32(Address src, RegisterID dest) - { - RegisterID scr = claimScratch(); - load32(src, scr); - m_assembler.addlRegReg(scr, dest); - releaseScratch(scr); - } - - void add32(AbsoluteAddress src, RegisterID dest) - { - RegisterID scr = claimScratch(); - load32(src.m_ptr, scr); - m_assembler.addlRegReg(scr, dest); - releaseScratch(scr); - } - - void and32(RegisterID src, RegisterID dest) - { - m_assembler.andlRegReg(src, dest); - } - - void and32(TrustedImm32 imm, RegisterID dest) - { - if ((imm.m_value <= 255) && (imm.m_value >= 0) && (dest == SH4Registers::r0)) { - m_assembler.andlImm8r(imm.m_value, dest); - return; - } - - RegisterID scr = claimScratch(); - m_assembler.loadConstant((imm.m_value), scr); - m_assembler.andlRegReg(scr, dest); - releaseScratch(scr); - } - - void and32(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - if (src != dest) { - move(imm, dest); - and32(src, dest); - return; - } - - and32(imm, dest); - } - - void lshift32(RegisterID shiftamount, RegisterID dest) - { - if (shiftamount == SH4Registers::r0) - m_assembler.andlImm8r(0x1f, shiftamount); - else { - RegisterID scr = claimScratch(); - m_assembler.loadConstant(0x1f, scr); - m_assembler.andlRegReg(scr, shiftamount); - releaseScratch(scr); - } - m_assembler.shllRegReg(dest, shiftamount); - } - - void rshift32(int imm, RegisterID dest) - { - RegisterID scr = claimScratch(); - m_assembler.loadConstant(-imm, scr); - m_assembler.shaRegReg(dest, scr); - releaseScratch(scr); - } - - void lshift32(TrustedImm32 imm, RegisterID dest) - { - if (!imm.m_value) - return; - - if ((imm.m_value == 1) || (imm.m_value == 2) || (imm.m_value == 8) || (imm.m_value == 16)) { - m_assembler.shllImm8r(imm.m_value, dest); - return; - } - - RegisterID scr = claimScratch(); - m_assembler.loadConstant((imm.m_value & 0x1f) , scr); - m_assembler.shllRegReg(dest, scr); - releaseScratch(scr); - } - - void lshift32(RegisterID src, TrustedImm32 shiftamount, RegisterID dest) - { - if (src != dest) - move(src, dest); - - lshift32(shiftamount, dest); - } - - void mul32(RegisterID src, RegisterID dest) - { - m_assembler.imullRegReg(src, dest); - m_assembler.stsmacl(dest); - } - - void mul32(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - RegisterID scr = claimScratch(); - move(imm, scr); - if (src != dest) - move(src, dest); - mul32(scr, dest); - releaseScratch(scr); - } - - void or32(RegisterID src, RegisterID dest) - { - m_assembler.orlRegReg(src, dest); - } - - void or32(TrustedImm32 imm, RegisterID dest) - { - if ((imm.m_value <= 255) && (imm.m_value >= 0) && (dest == SH4Registers::r0)) { - m_assembler.orlImm8r(imm.m_value, dest); - return; - } - - RegisterID scr = claimScratch(); - m_assembler.loadConstant(imm.m_value, scr); - m_assembler.orlRegReg(scr, dest); - releaseScratch(scr); - } - - void or32(RegisterID op1, RegisterID op2, RegisterID dest) - { - if (op1 == op2) - move(op1, dest); - else if (op1 == dest) - or32(op2, dest); - else { - move(op2, dest); - or32(op1, dest); - } - } - - -void or32(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - if (src != dest) { - move(imm, dest); - or32(src, dest); - return; - } - - or32(imm, dest); - } - - void xor32(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - if (src != dest) { - move(imm, dest); - xor32(src, dest); - return; - } - - xor32(imm, dest); - } - - void rshift32(RegisterID shiftamount, RegisterID dest) - { - if (shiftamount == SH4Registers::r0) - m_assembler.andlImm8r(0x1f, shiftamount); - else { - RegisterID scr = claimScratch(); - m_assembler.loadConstant(0x1f, scr); - m_assembler.andlRegReg(scr, shiftamount); - releaseScratch(scr); - } - m_assembler.neg(shiftamount, shiftamount); - m_assembler.shaRegReg(dest, shiftamount); - } - - void rshift32(TrustedImm32 imm, RegisterID dest) - { - if (imm.m_value & 0x1f) - rshift32(imm.m_value & 0x1f, dest); - } - - void rshift32(RegisterID src, TrustedImm32 imm, RegisterID dest) - { - if (src != dest) - move(src, dest); - rshift32(imm, dest); - } - - void sub32(RegisterID src, RegisterID dest) - { - m_assembler.sublRegReg(src, dest); - } - - void sub32(TrustedImm32 imm, AbsoluteAddress address, RegisterID scratchReg) - { - RegisterID result = claimScratch(); - - m_assembler.loadConstant(reinterpret_cast<uint32_t>(address.m_ptr), scratchReg); - m_assembler.movlMemReg(scratchReg, result); - - if (m_assembler.isImmediate(-imm.m_value)) - m_assembler.addlImm8r(-imm.m_value, result); - else { - m_assembler.loadConstant(imm.m_value, scratchReg3); - m_assembler.sublRegReg(scratchReg3, result); - } - - store32(result, scratchReg); - releaseScratch(result); - } - - void sub32(TrustedImm32 imm, AbsoluteAddress address) - { - RegisterID result = claimScratch(); - RegisterID scratchReg = claimScratch(); - - m_assembler.loadConstant(reinterpret_cast<uint32_t>(address.m_ptr), scratchReg); - m_assembler.movlMemReg(scratchReg, result); - - if (m_assembler.isImmediate(-imm.m_value)) - m_assembler.addlImm8r(-imm.m_value, result); - else { - m_assembler.loadConstant(imm.m_value, scratchReg3); - m_assembler.sublRegReg(scratchReg3, result); - } - - store32(result, scratchReg); - releaseScratch(result); - releaseScratch(scratchReg); - } - - void add32(TrustedImm32 imm, AbsoluteAddress address, RegisterID scratchReg) - { - RegisterID result = claimScratch(); - - m_assembler.loadConstant(reinterpret_cast<uint32_t>(address.m_ptr), scratchReg); - m_assembler.movlMemReg(scratchReg, result); - - if (m_assembler.isImmediate(imm.m_value)) - m_assembler.addlImm8r(imm.m_value, result); - else { - m_assembler.loadConstant(imm.m_value, scratchReg3); - m_assembler.addlRegReg(scratchReg3, result); - } - - store32(result, scratchReg); - releaseScratch(result); - } - - void add32(TrustedImm32 imm, AbsoluteAddress address) - { - RegisterID result = claimScratch(); - RegisterID scratchReg = claimScratch(); - - m_assembler.loadConstant(reinterpret_cast<uint32_t>(address.m_ptr), scratchReg); - m_assembler.movlMemReg(scratchReg, result); - - if (m_assembler.isImmediate(imm.m_value)) - m_assembler.addlImm8r(imm.m_value, result); - else { - m_assembler.loadConstant(imm.m_value, scratchReg3); - m_assembler.addlRegReg(scratchReg3, result); - } - - store32(result, scratchReg); - releaseScratch(result); - releaseScratch(scratchReg); - } - - void add64(TrustedImm32 imm, AbsoluteAddress address) - { - RegisterID scr1 = claimScratch(); - RegisterID scr2 = claimScratch(); - - // Add 32-bit LSB first. - m_assembler.loadConstant(reinterpret_cast<uint32_t>(address.m_ptr), scr1); - m_assembler.movlMemReg(scr1, scr1); // scr1 = 32-bit LSB of int64 @ address - m_assembler.loadConstant(imm.m_value, scr2); - m_assembler.clrt(); - m_assembler.addclRegReg(scr1, scr2); - m_assembler.loadConstant(reinterpret_cast<uint32_t>(address.m_ptr), scr1); - m_assembler.movlRegMem(scr2, scr1); // Update address with 32-bit LSB result. - - // Then add 32-bit MSB. - m_assembler.addlImm8r(4, scr1); - m_assembler.movlMemReg(scr1, scr1); // scr1 = 32-bit MSB of int64 @ address - m_assembler.movt(scr2); - if (imm.m_value < 0) - m_assembler.addlImm8r(-1, scr2); // Sign extend imm value if needed. - m_assembler.addvlRegReg(scr2, scr1); - m_assembler.loadConstant(reinterpret_cast<uint32_t>(address.m_ptr) + 4, scr2); - m_assembler.movlRegMem(scr1, scr2); // Update (address + 4) with 32-bit MSB result. - - releaseScratch(scr2); - releaseScratch(scr1); - } - - void sub32(TrustedImm32 imm, RegisterID dest) - { - if (m_assembler.isImmediate(-imm.m_value)) { - m_assembler.addlImm8r(-imm.m_value, dest); - return; - } - - RegisterID scr = claimScratch(); - m_assembler.loadConstant(imm.m_value, scr); - m_assembler.sublRegReg(scr, dest); - releaseScratch(scr); - } - - void sub32(Address src, RegisterID dest) - { - RegisterID scr = claimScratch(); - load32(src, scr); - m_assembler.sublRegReg(scr, dest); - releaseScratch(scr); - } - - void xor32(RegisterID src, RegisterID dest) - { - m_assembler.xorlRegReg(src, dest); - } - - void xor32(TrustedImm32 imm, RegisterID srcDest) - { - if (imm.m_value == -1) { - m_assembler.notlReg(srcDest, srcDest); - return; - } - - if ((srcDest != SH4Registers::r0) || (imm.m_value > 255) || (imm.m_value < 0)) { - RegisterID scr = claimScratch(); - m_assembler.loadConstant((imm.m_value), scr); - m_assembler.xorlRegReg(scr, srcDest); - releaseScratch(scr); - return; - } - - m_assembler.xorlImm8r(imm.m_value, srcDest); - } - - void compare32(int imm, RegisterID dst, RelationalCondition cond) - { - if (((cond == Equal) || (cond == NotEqual)) && (dst == SH4Registers::r0) && m_assembler.isImmediate(imm)) { - m_assembler.cmpEqImmR0(imm, dst); - return; - } - - RegisterID scr = claimScratch(); - m_assembler.loadConstant(imm, scr); - m_assembler.cmplRegReg(scr, dst, SH4Condition(cond)); - releaseScratch(scr); - } - - void compare32(int offset, RegisterID base, RegisterID left, RelationalCondition cond) - { - RegisterID scr = claimScratch(); - if (!offset) { - m_assembler.movlMemReg(base, scr); - m_assembler.cmplRegReg(scr, left, SH4Condition(cond)); - releaseScratch(scr); - return; - } - - if ((offset < 0) || (offset >= 64)) { - m_assembler.loadConstant(offset, scr); - m_assembler.addlRegReg(base, scr); - m_assembler.movlMemReg(scr, scr); - m_assembler.cmplRegReg(scr, left, SH4Condition(cond)); - releaseScratch(scr); - return; - } - - m_assembler.movlMemReg(offset >> 2, base, scr); - m_assembler.cmplRegReg(scr, left, SH4Condition(cond)); - releaseScratch(scr); - } - - void testImm(int imm, int offset, RegisterID base) - { - RegisterID scr = claimScratch(); - RegisterID scr1 = claimScratch(); - - if ((offset < 0) || (offset >= 64)) { - m_assembler.loadConstant(offset, scr); - m_assembler.addlRegReg(base, scr); - m_assembler.movlMemReg(scr, scr); - } else if (offset) - m_assembler.movlMemReg(offset >> 2, base, scr); - else - m_assembler.movlMemReg(base, scr); - if (m_assembler.isImmediate(imm)) - m_assembler.movImm8(imm, scr1); - else - m_assembler.loadConstant(imm, scr1); - - m_assembler.testlRegReg(scr, scr1); - releaseScratch(scr); - releaseScratch(scr1); - } - - void testlImm(int imm, RegisterID dst) - { - if ((dst == SH4Registers::r0) && (imm <= 255) && (imm >= 0)) { - m_assembler.testlImm8r(imm, dst); - return; - } - - RegisterID scr = claimScratch(); - m_assembler.loadConstant(imm, scr); - m_assembler.testlRegReg(scr, dst); - releaseScratch(scr); - } - - void compare32(RegisterID right, int offset, RegisterID base, RelationalCondition cond) - { - if (!offset) { - RegisterID scr = claimScratch(); - m_assembler.movlMemReg(base, scr); - m_assembler.cmplRegReg(right, scr, SH4Condition(cond)); - releaseScratch(scr); - return; - } - - if ((offset < 0) || (offset >= 64)) { - RegisterID scr = claimScratch(); - m_assembler.loadConstant(offset, scr); - m_assembler.addlRegReg(base, scr); - m_assembler.movlMemReg(scr, scr); - m_assembler.cmplRegReg(right, scr, SH4Condition(cond)); - releaseScratch(scr); - return; - } - - RegisterID scr = claimScratch(); - m_assembler.movlMemReg(offset >> 2, base, scr); - m_assembler.cmplRegReg(right, scr, SH4Condition(cond)); - releaseScratch(scr); - } - - void compare32(int imm, int offset, RegisterID base, RelationalCondition cond) - { - if (!offset) { - RegisterID scr = claimScratch(); - RegisterID scr1 = claimScratch(); - m_assembler.movlMemReg(base, scr); - m_assembler.loadConstant(imm, scr1); - m_assembler.cmplRegReg(scr1, scr, SH4Condition(cond)); - releaseScratch(scr1); - releaseScratch(scr); - return; - } - - if ((offset < 0) || (offset >= 64)) { - RegisterID scr = claimScratch(); - RegisterID scr1 = claimScratch(); - m_assembler.loadConstant(offset, scr); - m_assembler.addlRegReg(base, scr); - m_assembler.movlMemReg(scr, scr); - m_assembler.loadConstant(imm, scr1); - m_assembler.cmplRegReg(scr1, scr, SH4Condition(cond)); - releaseScratch(scr1); - releaseScratch(scr); - return; - } - - RegisterID scr = claimScratch(); - RegisterID scr1 = claimScratch(); - m_assembler.movlMemReg(offset >> 2, base, scr); - m_assembler.loadConstant(imm, scr1); - m_assembler.cmplRegReg(scr1, scr, SH4Condition(cond)); - releaseScratch(scr1); - releaseScratch(scr); - } - - // Memory access operation - - void load32(ImplicitAddress address, RegisterID dest) - { - load32(address.base, address.offset, dest); - } - - void load8(ImplicitAddress address, RegisterID dest) - { - load8(address.base, address.offset, dest); - } - - void load8(BaseIndex address, RegisterID dest) - { - RegisterID scr = claimScratch(); - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - add32(address.base, scr); - load8(scr, address.offset, dest); - releaseScratch(scr); - } - - void load8Signed(BaseIndex address, RegisterID dest) - { - RegisterID scr = claimScratch(); - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - add32(address.base, scr); - load8Signed(scr, address.offset, dest); - releaseScratch(scr); - } - - void load32(BaseIndex address, RegisterID dest) - { - RegisterID scr = claimScratch(); - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - add32(address.base, scr); - load32(scr, address.offset, dest); - releaseScratch(scr); - } - - void load32(const void* address, RegisterID dest) - { - m_assembler.loadConstant(reinterpret_cast<uint32_t>(const_cast<void*>(address)), dest); - m_assembler.movlMemReg(dest, dest); - } - - void load32(RegisterID base, int offset, RegisterID dest) - { - if (!offset) { - m_assembler.movlMemReg(base, dest); - return; - } - - if ((offset >= 0) && (offset < 64)) { - m_assembler.movlMemReg(offset >> 2, base, dest); - return; - } - - if ((dest == SH4Registers::r0) && (dest != base)) { - m_assembler.loadConstant((offset), dest); - m_assembler.movlR0mr(base, dest); - return; - } - - RegisterID scr; - if (dest == base) - scr = claimScratch(); - else - scr = dest; - m_assembler.loadConstant((offset), scr); - m_assembler.addlRegReg(base, scr); - m_assembler.movlMemReg(scr, dest); - - if (dest == base) - releaseScratch(scr); - } - - void load8Signed(RegisterID base, int offset, RegisterID dest) - { - if (!offset) { - m_assembler.movbMemReg(base, dest); - return; - } - - if ((offset > 0) && (offset < 64) && (dest == SH4Registers::r0)) { - m_assembler.movbMemReg(offset, base, dest); - return; - } - - if (base != dest) { - m_assembler.loadConstant((offset), dest); - m_assembler.addlRegReg(base, dest); - m_assembler.movbMemReg(dest, dest); - return; - } - - RegisterID scr = claimScratch(); - m_assembler.loadConstant((offset), scr); - m_assembler.addlRegReg(base, scr); - m_assembler.movbMemReg(scr, dest); - releaseScratch(scr); - } - - void load8(RegisterID base, int offset, RegisterID dest) - { - if (!offset) { - m_assembler.movbMemReg(base, dest); - m_assembler.extub(dest, dest); - return; - } - - if ((offset > 0) && (offset < 64) && (dest == SH4Registers::r0)) { - m_assembler.movbMemReg(offset, base, dest); - m_assembler.extub(dest, dest); - return; - } - - if (base != dest) { - m_assembler.loadConstant((offset), dest); - m_assembler.addlRegReg(base, dest); - m_assembler.movbMemReg(dest, dest); - m_assembler.extub(dest, dest); - return; - } - - RegisterID scr = claimScratch(); - m_assembler.loadConstant((offset), scr); - m_assembler.addlRegReg(base, scr); - m_assembler.movbMemReg(scr, dest); - m_assembler.extub(dest, dest); - releaseScratch(scr); - } - - void load32(RegisterID r0, RegisterID src, RegisterID dst) - { - ASSERT(r0 == SH4Registers::r0); - m_assembler.movlR0mr(src, dst); - } - - void load32(RegisterID src, RegisterID dst) - { - m_assembler.movlMemReg(src, dst); - } - - void load16(ImplicitAddress address, RegisterID dest) - { - if (!address.offset) { - m_assembler.movwMemReg(address.base, dest); - extuw(dest, dest); - return; - } - - if ((address.offset > 0) && (address.offset < 64) && (dest == SH4Registers::r0)) { - m_assembler.movwMemReg(address.offset, address.base, dest); - extuw(dest, dest); - return; - } - - if (address.base != dest) { - m_assembler.loadConstant((address.offset), dest); - m_assembler.addlRegReg(address.base, dest); - m_assembler.movwMemReg(dest, dest); - extuw(dest, dest); - return; - } - - RegisterID scr = claimScratch(); - m_assembler.loadConstant((address.offset), scr); - m_assembler.addlRegReg(address.base, scr); - m_assembler.movwMemReg(scr, dest); - extuw(dest, dest); - releaseScratch(scr); - } - - void load16Unaligned(BaseIndex address, RegisterID dest) - { - - RegisterID scr = claimScratch(); - RegisterID scr1 = claimScratch(); - - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - - if (address.offset) - add32(TrustedImm32(address.offset), scr); - - add32(address.base, scr); - load8(scr, scr1); - add32(TrustedImm32(1), scr); - load8(scr, dest); - m_assembler.shllImm8r(8, dest); - or32(scr1, dest); - - releaseScratch(scr); - releaseScratch(scr1); - } - - void load16(RegisterID src, RegisterID dest) - { - m_assembler.movwMemReg(src, dest); - extuw(dest, dest); - } - - void load16Signed(RegisterID src, RegisterID dest) - { - m_assembler.movwMemReg(src, dest); - } - - void load16(RegisterID r0, RegisterID src, RegisterID dest) - { - ASSERT(r0 == SH4Registers::r0); - m_assembler.movwR0mr(src, dest); - extuw(dest, dest); - } - - void load16Signed(RegisterID r0, RegisterID src, RegisterID dest) - { - ASSERT(r0 == SH4Registers::r0); - m_assembler.movwR0mr(src, dest); - } - - void load16(BaseIndex address, RegisterID dest) - { - RegisterID scr = claimScratch(); - - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - - if (address.offset) - add32(TrustedImm32(address.offset), scr); - if (address.base == SH4Registers::r0) - load16(address.base, scr, dest); - else { - add32(address.base, scr); - load16(scr, dest); - } - - releaseScratch(scr); - } - - void load16Signed(BaseIndex address, RegisterID dest) - { - RegisterID scr = claimScratch(); - - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - - if (address.offset) - add32(TrustedImm32(address.offset), scr); - if (address.base == SH4Registers::r0) - load16Signed(address.base, scr, dest); - else { - add32(address.base, scr); - load16Signed(scr, dest); - } - - releaseScratch(scr); - } - - void store8(RegisterID src, BaseIndex address) - { - RegisterID scr = claimScratch(); - - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - add32(address.base, scr); - - m_assembler.movbRegMem(src, scr); - - releaseScratch(scr); - } - - void store16(RegisterID src, BaseIndex address) - { - RegisterID scr = claimScratch(); - - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - add32(address.base, scr); - - m_assembler.movwRegMem(src, scr); - - releaseScratch(scr); - } - - void store32(RegisterID src, ImplicitAddress address) - { - RegisterID scr = claimScratch(); - store32(src, address.offset, address.base, scr); - releaseScratch(scr); - } - - void store32(RegisterID src, int offset, RegisterID base, RegisterID scr) - { - if (!offset) { - m_assembler.movlRegMem(src, base); - return; - } - - if ((offset >=0) && (offset < 64)) { - m_assembler.movlRegMem(src, offset >> 2, base); - return; - } - - m_assembler.loadConstant((offset), scr); - if (scr == SH4Registers::r0) { - m_assembler.movlRegMemr0(src, base); - return; - } - - m_assembler.addlRegReg(base, scr); - m_assembler.movlRegMem(src, scr); - } - - void store32(RegisterID src, RegisterID offset, RegisterID base) - { - ASSERT(offset == SH4Registers::r0); - m_assembler.movlRegMemr0(src, base); - } - - void store32(RegisterID src, RegisterID dst) - { - m_assembler.movlRegMem(src, dst); - } - - void store32(TrustedImm32 imm, ImplicitAddress address) - { - RegisterID scr = claimScratch(); - RegisterID scr1 = claimScratch(); - m_assembler.loadConstant((imm.m_value), scr); - store32(scr, address.offset, address.base, scr1); - releaseScratch(scr); - releaseScratch(scr1); - } - - void store32(RegisterID src, BaseIndex address) - { - RegisterID scr = claimScratch(); - - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - add32(address.base, scr); - store32(src, Address(scr, address.offset)); - - releaseScratch(scr); - } - - void store32(TrustedImm32 imm, void* address) - { - RegisterID scr = claimScratch(); - RegisterID scr1 = claimScratch(); - m_assembler.loadConstant((imm.m_value), scr); - m_assembler.loadConstant(reinterpret_cast<uint32_t>(address), scr1); - m_assembler.movlRegMem(scr, scr1); - releaseScratch(scr); - releaseScratch(scr1); - } - - void store32(RegisterID src, void* address) - { - RegisterID scr = claimScratch(); - m_assembler.loadConstant(reinterpret_cast<uint32_t>(address), scr); - m_assembler.movlRegMem(src, scr); - releaseScratch(scr); - } - - DataLabel32 load32WithAddressOffsetPatch(Address address, RegisterID dest) - { - RegisterID scr = claimScratch(); - DataLabel32 label(this); - m_assembler.loadConstantUnReusable(address.offset, scr); - m_assembler.addlRegReg(address.base, scr); - m_assembler.movlMemReg(scr, dest); - releaseScratch(scr); - return label; - } - - DataLabel32 store32WithAddressOffsetPatch(RegisterID src, Address address) - { - RegisterID scr = claimScratch(); - DataLabel32 label(this); - m_assembler.loadConstantUnReusable(address.offset, scr); - m_assembler.addlRegReg(address.base, scr); - m_assembler.movlRegMem(src, scr); - releaseScratch(scr); - return label; - } - - DataLabelCompact load32WithCompactAddressOffsetPatch(Address address, RegisterID dest) - { - DataLabelCompact dataLabel(this); - ASSERT(address.offset <= MaximumCompactPtrAlignedAddressOffset); - ASSERT(address.offset >= 0); - m_assembler.movlMemRegCompact(address.offset >> 2, address.base, dest); - return dataLabel; - } - - ConvertibleLoadLabel convertibleLoadPtr(Address address, RegisterID dest) - { - ConvertibleLoadLabel result(this); - - RegisterID scr = claimScratch(); - m_assembler.movImm8(address.offset, scr); - m_assembler.addlRegReg(address.base, scr); - m_assembler.movlMemReg(scr, dest); - releaseScratch(scr); - - return result; - } - - // Floating-point operations - - static bool supportsFloatingPoint() { return true; } - static bool supportsFloatingPointTruncate() { return true; } - static bool supportsFloatingPointSqrt() { return true; } - static bool supportsFloatingPointAbs() { return false; } - - void moveDoubleToInts(FPRegisterID src, RegisterID dest1, RegisterID dest2) - { - m_assembler.fldsfpul((FPRegisterID)(src + 1)); - m_assembler.stsfpulReg(dest1); - m_assembler.fldsfpul(src); - m_assembler.stsfpulReg(dest2); - } - - void moveIntsToDouble(RegisterID src1, RegisterID src2, FPRegisterID dest, FPRegisterID scratch) - { - UNUSED_PARAM(scratch); - m_assembler.ldsrmfpul(src1); - m_assembler.fstsfpul((FPRegisterID)(dest + 1)); - m_assembler.ldsrmfpul(src2); - m_assembler.fstsfpul(dest); - } - - void loadFloat(BaseIndex address, FPRegisterID dest) - { - RegisterID scr = claimScratch(); - - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - add32(address.base, scr); - if (address.offset) - add32(TrustedImm32(address.offset), scr); - - m_assembler.fmovsReadrm(scr, dest); - releaseScratch(scr); - } - - void loadDouble(BaseIndex address, FPRegisterID dest) - { - RegisterID scr = claimScratch(); - - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - add32(address.base, scr); - if (address.offset) - add32(TrustedImm32(address.offset), scr); - - m_assembler.fmovsReadrminc(scr, (FPRegisterID)(dest + 1)); - m_assembler.fmovsReadrm(scr, dest); - releaseScratch(scr); - } - - void loadDouble(ImplicitAddress address, FPRegisterID dest) - { - RegisterID scr = claimScratch(); - - m_assembler.loadConstant(address.offset, scr); - if (address.base == SH4Registers::r0) { - m_assembler.fmovsReadr0r(scr, (FPRegisterID)(dest + 1)); - m_assembler.addlImm8r(4, scr); - m_assembler.fmovsReadr0r(scr, dest); - releaseScratch(scr); - return; - } - - m_assembler.addlRegReg(address.base, scr); - m_assembler.fmovsReadrminc(scr, (FPRegisterID)(dest + 1)); - m_assembler.fmovsReadrm(scr, dest); - releaseScratch(scr); - } - - void loadDouble(const void* address, FPRegisterID dest) - { - RegisterID scr = claimScratch(); - m_assembler.loadConstant(reinterpret_cast<uint32_t>(address), scr); - m_assembler.fmovsReadrminc(scr, (FPRegisterID)(dest + 1)); - m_assembler.fmovsReadrm(scr, dest); - releaseScratch(scr); - } - - void storeFloat(FPRegisterID src, BaseIndex address) - { - RegisterID scr = claimScratch(); - - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - add32(address.base, scr); - if (address.offset) - add32(TrustedImm32(address.offset), scr); - - m_assembler.fmovsWriterm(src, scr); - - releaseScratch(scr); - } - - void storeDouble(FPRegisterID src, ImplicitAddress address) - { - RegisterID scr = claimScratch(); - m_assembler.loadConstant(address.offset, scr); - m_assembler.addlRegReg(address.base, scr); - m_assembler.fmovsWriterm((FPRegisterID)(src + 1), scr); - m_assembler.addlImm8r(4, scr); - m_assembler.fmovsWriterm(src, scr); - releaseScratch(scr); - } - - void storeDouble(FPRegisterID src, BaseIndex address) - { - RegisterID scr = claimScratch(); - - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - add32(address.base, scr); - if (address.offset) - add32(TrustedImm32(address.offset), scr); - - m_assembler.fmovsWriterm((FPRegisterID)(src + 1), scr); - m_assembler.addlImm8r(4, scr); - m_assembler.fmovsWriterm(src, scr); - - releaseScratch(scr); - } - - void addDouble(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest) - { - if (op1 == dest) - m_assembler.daddRegReg(op2, dest); - else { - m_assembler.dmovRegReg(op1, dest); - m_assembler.daddRegReg(op2, dest); - } - } - - void addDouble(FPRegisterID src, FPRegisterID dest) - { - m_assembler.daddRegReg(src, dest); - } - - void addDouble(AbsoluteAddress address, FPRegisterID dest) - { - loadDouble(address.m_ptr, fscratch); - addDouble(fscratch, dest); - } - - void addDouble(Address address, FPRegisterID dest) - { - loadDouble(address, fscratch); - addDouble(fscratch, dest); - } - - void subDouble(FPRegisterID src, FPRegisterID dest) - { - m_assembler.dsubRegReg(src, dest); - } - - void subDouble(Address address, FPRegisterID dest) - { - loadDouble(address, fscratch); - subDouble(fscratch, dest); - } - - void mulDouble(FPRegisterID src, FPRegisterID dest) - { - m_assembler.dmulRegReg(src, dest); - } - - void mulDouble(Address address, FPRegisterID dest) - { - loadDouble(address, fscratch); - mulDouble(fscratch, dest); - } - - void divDouble(FPRegisterID src, FPRegisterID dest) - { - m_assembler.ddivRegReg(src, dest); - } - - void convertFloatToDouble(FPRegisterID src, FPRegisterID dst) - { - m_assembler.fldsfpul(src); - m_assembler.dcnvsd(dst); - } - - void convertDoubleToFloat(FPRegisterID src, FPRegisterID dst) - { - m_assembler.dcnvds(src); - m_assembler.fstsfpul(dst); - } - - void convertInt32ToDouble(RegisterID src, FPRegisterID dest) - { - m_assembler.ldsrmfpul(src); - m_assembler.floatfpulDreg(dest); - } - - void convertInt32ToDouble(AbsoluteAddress src, FPRegisterID dest) - { - RegisterID scr = claimScratch(); - m_assembler.loadConstant(reinterpret_cast<uint32_t>(src.m_ptr), scr); - convertInt32ToDouble(scr, dest); - releaseScratch(scr); - } - - void convertInt32ToDouble(Address src, FPRegisterID dest) - { - RegisterID scr = claimScratch(); - load32(src, scr); - convertInt32ToDouble(scr, dest); - releaseScratch(scr); - } - - void load32WithUnalignedHalfWords(BaseIndex address, RegisterID dest) - { - RegisterID scr = claimScratch(); - RegisterID scr1 = claimScratch(); - Jump m_jump; - JumpList end; - - if (dest != SH4Registers::r0) - move(SH4Registers::r0, scr1); - - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - add32(address.base, scr); - - if (address.offset) - add32(TrustedImm32(address.offset), scr); - - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 68, sizeof(uint32_t)); - move(scr, SH4Registers::r0); - m_assembler.andlImm8r(0x3, SH4Registers::r0); - m_assembler.cmpEqImmR0(0x0, SH4Registers::r0); - m_jump = Jump(m_assembler.jne(), SH4Assembler::JumpNear); - if (dest != SH4Registers::r0) - move(scr1, SH4Registers::r0); - - load32(scr, dest); - end.append(Jump(m_assembler.bra(), SH4Assembler::JumpNear)); - m_assembler.nop(); - m_jump.link(this); - m_assembler.andlImm8r(0x1, SH4Registers::r0); - m_assembler.cmpEqImmR0(0x0, SH4Registers::r0); - - if (dest != SH4Registers::r0) - move(scr1, SH4Registers::r0); - - m_jump = Jump(m_assembler.jne(), SH4Assembler::JumpNear); - load16(scr, scr1); - add32(TrustedImm32(2), scr); - load16(scr, dest); - m_assembler.shllImm8r(16, dest); - or32(scr1, dest); - end.append(Jump(m_assembler.bra(), SH4Assembler::JumpNear)); - m_assembler.nop(); - m_jump.link(this); - load8(scr, scr1); - add32(TrustedImm32(1), scr); - load16(scr, dest); - m_assembler.shllImm8r(8, dest); - or32(dest, scr1); - add32(TrustedImm32(2), scr); - load8(scr, dest); - m_assembler.shllImm8r(8, dest); - m_assembler.shllImm8r(16, dest); - or32(scr1, dest); - end.link(this); - - releaseScratch(scr); - releaseScratch(scr1); - } - - Jump branch32WithUnalignedHalfWords(RelationalCondition cond, BaseIndex left, TrustedImm32 right) - { - RegisterID scr = scratchReg3; - load32WithUnalignedHalfWords(left, scr); - if (((cond == Equal) || (cond == NotEqual)) && !right.m_value) - m_assembler.testlRegReg(scr, scr); - else - compare32(right.m_value, scr, cond); - - if (cond == NotEqual) - return branchFalse(); - return branchTrue(); - } - - Jump branchDoubleNonZero(FPRegisterID reg, FPRegisterID scratch) - { - m_assembler.movImm8(0, scratchReg3); - convertInt32ToDouble(scratchReg3, scratch); - return branchDouble(DoubleNotEqual, reg, scratch); - } - - Jump branchDoubleZeroOrNaN(FPRegisterID reg, FPRegisterID scratch) - { - m_assembler.movImm8(0, scratchReg3); - convertInt32ToDouble(scratchReg3, scratch); - return branchDouble(DoubleEqualOrUnordered, reg, scratch); - } - - Jump branchDouble(DoubleCondition cond, FPRegisterID left, FPRegisterID right) - { - if (cond == DoubleEqual) { - m_assembler.dcmppeq(right, left); - return branchTrue(); - } - - if (cond == DoubleNotEqual) { - RegisterID scr = claimScratch(); - JumpList end; - m_assembler.loadConstant(0x7fbfffff, scratchReg3); - m_assembler.dcnvds(right); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 22, sizeof(uint32_t)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcnvds(left); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcmppeq(right, left); - releaseScratch(scr); - Jump m_jump = branchFalse(); - end.link(this); - return m_jump; - } - - if (cond == DoubleGreaterThan) { - m_assembler.dcmppgt(right, left); - return branchTrue(); - } - - if (cond == DoubleGreaterThanOrEqual) { - m_assembler.dcmppgt(left, right); - return branchFalse(); - } - - if (cond == DoubleLessThan) { - m_assembler.dcmppgt(left, right); - return branchTrue(); - } - - if (cond == DoubleLessThanOrEqual) { - m_assembler.dcmppgt(right, left); - return branchFalse(); - } - - if (cond == DoubleEqualOrUnordered) { - RegisterID scr = claimScratch(); - JumpList end; - m_assembler.loadConstant(0x7fbfffff, scratchReg3); - m_assembler.dcnvds(right); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 22, sizeof(uint32_t)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcnvds(left); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcmppeq(left, right); - Jump m_jump = Jump(m_assembler.je()); - end.link(this); - m_assembler.extraInstrForBranch(scr); - releaseScratch(scr); - return m_jump; - } - - if (cond == DoubleGreaterThanOrUnordered) { - RegisterID scr = claimScratch(); - JumpList end; - m_assembler.loadConstant(0x7fbfffff, scratchReg3); - m_assembler.dcnvds(right); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 22, sizeof(uint32_t)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcnvds(left); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcmppgt(right, left); - Jump m_jump = Jump(m_assembler.je()); - end.link(this); - m_assembler.extraInstrForBranch(scr); - releaseScratch(scr); - return m_jump; - } - - if (cond == DoubleGreaterThanOrEqualOrUnordered) { - RegisterID scr = claimScratch(); - JumpList end; - m_assembler.loadConstant(0x7fbfffff, scratchReg3); - m_assembler.dcnvds(right); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 22, sizeof(uint32_t)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcnvds(left); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcmppgt(left, right); - Jump m_jump = Jump(m_assembler.jne()); - end.link(this); - m_assembler.extraInstrForBranch(scr); - releaseScratch(scr); - return m_jump; - } - - if (cond == DoubleLessThanOrUnordered) { - RegisterID scr = claimScratch(); - JumpList end; - m_assembler.loadConstant(0x7fbfffff, scratchReg3); - m_assembler.dcnvds(right); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 22, sizeof(uint32_t)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcnvds(left); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcmppgt(left, right); - Jump m_jump = Jump(m_assembler.je()); - end.link(this); - m_assembler.extraInstrForBranch(scr); - releaseScratch(scr); - return m_jump; - } - - if (cond == DoubleLessThanOrEqualOrUnordered) { - RegisterID scr = claimScratch(); - JumpList end; - m_assembler.loadConstant(0x7fbfffff, scratchReg3); - m_assembler.dcnvds(right); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 22, sizeof(uint32_t)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcnvds(left); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcmppgt(right, left); - Jump m_jump = Jump(m_assembler.jne()); - end.link(this); - m_assembler.extraInstrForBranch(scr); - releaseScratch(scr); - return m_jump; - } - - ASSERT(cond == DoubleNotEqualOrUnordered); - RegisterID scr = claimScratch(); - JumpList end; - m_assembler.loadConstant(0x7fbfffff, scratchReg3); - m_assembler.dcnvds(right); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 22, sizeof(uint32_t)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcnvds(left); - m_assembler.stsfpulReg(scr); - m_assembler.cmplRegReg(scratchReg3, scr, SH4Condition(Equal)); - end.append(Jump(m_assembler.je(), SH4Assembler::JumpNear)); - m_assembler.dcmppeq(right, left); - Jump m_jump = Jump(m_assembler.jne()); - end.link(this); - m_assembler.extraInstrForBranch(scr); - releaseScratch(scr); - return m_jump; - } - - Jump branchTrue() - { - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 6, sizeof(uint32_t)); - Jump m_jump = Jump(m_assembler.je()); - m_assembler.extraInstrForBranch(scratchReg3); - return m_jump; - } - - Jump branchFalse() - { - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 6, sizeof(uint32_t)); - Jump m_jump = Jump(m_assembler.jne()); - m_assembler.extraInstrForBranch(scratchReg3); - return m_jump; - } - - Jump branch32(RelationalCondition cond, BaseIndex left, TrustedImm32 right) - { - RegisterID scr = claimScratch(); - move(left.index, scr); - lshift32(TrustedImm32(left.scale), scr); - add32(left.base, scr); - load32(scr, left.offset, scr); - compare32(right.m_value, scr, cond); - releaseScratch(scr); - - if (cond == NotEqual) - return branchFalse(); - return branchTrue(); - } - - void sqrtDouble(FPRegisterID src, FPRegisterID dest) - { - if (dest != src) - m_assembler.dmovRegReg(src, dest); - m_assembler.dsqrt(dest); - } - - void absDouble(FPRegisterID, FPRegisterID) - { - RELEASE_ASSERT_NOT_REACHED(); - } - - Jump branchTest8(ResultCondition cond, Address address, TrustedImm32 mask = TrustedImm32(-1)) - { - RegisterID addressTempRegister = claimScratch(); - load8(address, addressTempRegister); - Jump jmp = branchTest32(cond, addressTempRegister, mask); - releaseScratch(addressTempRegister); - return jmp; - } - - Jump branchTest8(ResultCondition cond, AbsoluteAddress address, TrustedImm32 mask = TrustedImm32(-1)) - { - RegisterID addressTempRegister = claimScratch(); - move(TrustedImmPtr(address.m_ptr), addressTempRegister); - load8(Address(addressTempRegister), addressTempRegister); - Jump jmp = branchTest32(cond, addressTempRegister, mask); - releaseScratch(addressTempRegister); - return jmp; - } - - void signExtend32ToPtr(RegisterID src, RegisterID dest) - { - if (src != dest) - move(src, dest); - } - - Jump branch8(RelationalCondition cond, Address left, TrustedImm32 right) - { - RegisterID addressTempRegister = claimScratch(); - load8(left, addressTempRegister); - Jump jmp = branch32(cond, addressTempRegister, right); - releaseScratch(addressTempRegister); - return jmp; - } - - void compare8(RelationalCondition cond, Address left, TrustedImm32 right, RegisterID dest) - { - RegisterID addressTempRegister = claimScratch(); - load8(left, addressTempRegister); - compare32(cond, addressTempRegister, right, dest); - releaseScratch(addressTempRegister); - } - - Jump branchTruncateDoubleToInt32(FPRegisterID src, RegisterID dest) - { - m_assembler.ftrcdrmfpul(src); - m_assembler.stsfpulReg(dest); - m_assembler.loadConstant(0x7fffffff, scratchReg3); - m_assembler.cmplRegReg(dest, scratchReg3, SH4Condition(Equal)); - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 14, sizeof(uint32_t)); - m_assembler.branch(BT_OPCODE, 2); - m_assembler.addlImm8r(1, scratchReg3); - m_assembler.cmplRegReg(dest, scratchReg3, SH4Condition(Equal)); - return branchTrue(); - } - - // Stack manipulation operations - - void pop(RegisterID dest) - { - m_assembler.popReg(dest); - } - - void push(RegisterID src) - { - m_assembler.pushReg(src); - } - - void push(Address address) - { - if (!address.offset) { - push(address.base); - return; - } - - if ((address.offset < 0) || (address.offset >= 64)) { - RegisterID scr = claimScratch(); - m_assembler.loadConstant(address.offset, scr); - m_assembler.addlRegReg(address.base, scr); - m_assembler.movlMemReg(scr, SH4Registers::sp); - m_assembler.addlImm8r(-4, SH4Registers::sp); - releaseScratch(scr); - return; - } - - m_assembler.movlMemReg(address.offset >> 2, address.base, SH4Registers::sp); - m_assembler.addlImm8r(-4, SH4Registers::sp); - } - - void push(TrustedImm32 imm) - { - RegisterID scr = claimScratch(); - m_assembler.loadConstant(imm.m_value, scr); - push(scr); - releaseScratch(scr); - } - - // Register move operations - - void move(TrustedImm32 imm, RegisterID dest) - { - m_assembler.loadConstant(imm.m_value, dest); - } - - DataLabelPtr moveWithPatch(TrustedImmPtr initialValue, RegisterID dest) - { - m_assembler.ensureSpace(m_assembler.maxInstructionSize, sizeof(uint32_t)); - DataLabelPtr dataLabel(this); - m_assembler.loadConstantUnReusable(reinterpret_cast<uint32_t>(initialValue.m_value), dest); - return dataLabel; - } - - void move(RegisterID src, RegisterID dest) - { - if (src != dest) - m_assembler.movlRegReg(src, dest); - } - - void move(TrustedImmPtr imm, RegisterID dest) - { - m_assembler.loadConstant(imm.asIntptr(), dest); - } - - void extuw(RegisterID src, RegisterID dst) - { - m_assembler.extuw(src, dst); - } - - void compare32(RelationalCondition cond, RegisterID left, RegisterID right, RegisterID dest) - { - m_assembler.cmplRegReg(right, left, SH4Condition(cond)); - if (cond != NotEqual) { - m_assembler.movt(dest); - return; - } - - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 4); - m_assembler.movImm8(0, dest); - m_assembler.branch(BT_OPCODE, 0); - m_assembler.movImm8(1, dest); - } - - void compare32(RelationalCondition cond, RegisterID left, TrustedImm32 right, RegisterID dest) - { - if (left != dest) { - move(right, dest); - compare32(cond, left, dest, dest); - return; - } - - RegisterID scr = claimScratch(); - move(right, scr); - compare32(cond, left, scr, dest); - releaseScratch(scr); - } - - void test8(ResultCondition cond, Address address, TrustedImm32 mask, RegisterID dest) - { - ASSERT((cond == Zero) || (cond == NonZero)); - - load8(address, dest); - if (mask.m_value == -1) - compare32(0, dest, static_cast<RelationalCondition>(cond)); - else - testlImm(mask.m_value, dest); - if (cond != NonZero) { - m_assembler.movt(dest); - return; - } - - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 4); - m_assembler.movImm8(0, dest); - m_assembler.branch(BT_OPCODE, 0); - m_assembler.movImm8(1, dest); - } - - void loadPtrLinkReg(ImplicitAddress address) - { - RegisterID scr = claimScratch(); - load32(address, scr); - m_assembler.ldspr(scr); - releaseScratch(scr); - } - - Jump branch32(RelationalCondition cond, RegisterID left, RegisterID right) - { - m_assembler.cmplRegReg(right, left, SH4Condition(cond)); - /* BT label => BF off - nop LDR reg - nop braf @reg - nop nop - */ - if (cond == NotEqual) - return branchFalse(); - return branchTrue(); - } - - Jump branch32(RelationalCondition cond, RegisterID left, TrustedImm32 right) - { - if (((cond == Equal) || (cond == NotEqual)) && !right.m_value) - m_assembler.testlRegReg(left, left); - else - compare32(right.m_value, left, cond); - - if (cond == NotEqual) - return branchFalse(); - return branchTrue(); - } - - Jump branch32(RelationalCondition cond, RegisterID left, Address right) - { - compare32(right.offset, right.base, left, cond); - if (cond == NotEqual) - return branchFalse(); - return branchTrue(); - } - - Jump branch32(RelationalCondition cond, Address left, RegisterID right) - { - compare32(right, left.offset, left.base, cond); - if (cond == NotEqual) - return branchFalse(); - return branchTrue(); - } - - Jump branch32(RelationalCondition cond, Address left, TrustedImm32 right) - { - compare32(right.m_value, left.offset, left.base, cond); - if (cond == NotEqual) - return branchFalse(); - return branchTrue(); - } - - Jump branch32(RelationalCondition cond, AbsoluteAddress left, RegisterID right) - { - RegisterID scr = claimScratch(); - - move(TrustedImm32(reinterpret_cast<uint32_t>(left.m_ptr)), scr); - m_assembler.cmplRegReg(right, scr, SH4Condition(cond)); - releaseScratch(scr); - - if (cond == NotEqual) - return branchFalse(); - return branchTrue(); - } - - Jump branch32(RelationalCondition cond, AbsoluteAddress left, TrustedImm32 right) - { - RegisterID addressTempRegister = claimScratch(); - - m_assembler.loadConstant(reinterpret_cast<uint32_t>(left.m_ptr), addressTempRegister); - m_assembler.movlMemReg(addressTempRegister, addressTempRegister); - compare32(right.m_value, addressTempRegister, cond); - releaseScratch(addressTempRegister); - - if (cond == NotEqual) - return branchFalse(); - return branchTrue(); - } - - Jump branch8(RelationalCondition cond, BaseIndex left, TrustedImm32 right) - { - ASSERT(!(right.m_value & 0xFFFFFF00)); - RegisterID scr = claimScratch(); - - move(left.index, scr); - lshift32(TrustedImm32(left.scale), scr); - - if (left.offset) - add32(TrustedImm32(left.offset), scr); - add32(left.base, scr); - load8(scr, scr); - RegisterID scr1 = claimScratch(); - m_assembler.loadConstant(right.m_value, scr1); - releaseScratch(scr); - releaseScratch(scr1); - - return branch32(cond, scr, scr1); - } - - Jump branchTest32(ResultCondition cond, RegisterID reg, RegisterID mask) - { - ASSERT((cond == Zero) || (cond == NonZero)); - - m_assembler.testlRegReg(reg, mask); - - if (cond == NonZero) // NotEqual - return branchFalse(); - return branchTrue(); - } - - Jump branchTest32(ResultCondition cond, RegisterID reg, TrustedImm32 mask = TrustedImm32(-1)) - { - ASSERT((cond == Zero) || (cond == NonZero)); - - if (mask.m_value == -1) - m_assembler.testlRegReg(reg, reg); - else - testlImm(mask.m_value, reg); - - if (cond == NonZero) // NotEqual - return branchFalse(); - return branchTrue(); - } - - Jump branchTest32(ResultCondition cond, Address address, TrustedImm32 mask = TrustedImm32(-1)) - { - ASSERT((cond == Zero) || (cond == NonZero)); - - if (mask.m_value == -1) - compare32(0, address.offset, address.base, static_cast<RelationalCondition>(cond)); - else - testImm(mask.m_value, address.offset, address.base); - - if (cond == NonZero) // NotEqual - return branchFalse(); - return branchTrue(); - } - - Jump branchTest32(ResultCondition cond, BaseIndex address, TrustedImm32 mask = TrustedImm32(-1)) - { - RegisterID scr = claimScratch(); - - move(address.index, scr); - lshift32(TrustedImm32(address.scale), scr); - add32(address.base, scr); - load32(scr, address.offset, scr); - - if (mask.m_value == -1) - m_assembler.testlRegReg(scr, scr); - else - testlImm(mask.m_value, scr); - - releaseScratch(scr); - - if (cond == NonZero) // NotEqual - return branchFalse(); - return branchTrue(); - } - - Jump jump() - { - return Jump(m_assembler.jmp()); - } - - void jump(RegisterID target) - { - m_assembler.jmpReg(target); - } - - void jump(Address address) - { - RegisterID scr = claimScratch(); - - if ((address.offset < 0) || (address.offset >= 64)) { - m_assembler.loadConstant(address.offset, scr); - m_assembler.addlRegReg(address.base, scr); - m_assembler.movlMemReg(scr, scr); - } else if (address.offset) - m_assembler.movlMemReg(address.offset >> 2, address.base, scr); - else - m_assembler.movlMemReg(address.base, scr); - m_assembler.jmpReg(scr); - - releaseScratch(scr); - } - - // Arithmetic control flow operations - - Jump branchAdd32(ResultCondition cond, RegisterID src, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - - if (cond == Overflow) { - m_assembler.addvlRegReg(src, dest); - return branchTrue(); - } - - if (cond == Signed) { - m_assembler.addlRegReg(src, dest); - // Check if dest is negative - m_assembler.cmppz(dest); - return branchFalse(); - } - - m_assembler.addlRegReg(src, dest); - compare32(0, dest, Equal); - - if (cond == NonZero) // NotEqual - return branchFalse(); - return branchTrue(); - } - - Jump branchAdd32(ResultCondition cond, TrustedImm32 imm, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - - move(imm, scratchReg3); - return branchAdd32(cond, scratchReg3, dest); - } - - Jump branchAdd32(ResultCondition cond, RegisterID src, TrustedImm32 imm, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - - if (src != dest) - move(src, dest); - - if (cond == Overflow) { - move(imm, scratchReg3); - m_assembler.addvlRegReg(scratchReg3, dest); - return branchTrue(); - } - - add32(imm, dest); - - if (cond == Signed) { - m_assembler.cmppz(dest); - return branchFalse(); - } - - compare32(0, dest, Equal); - - if (cond == NonZero) // NotEqual - return branchFalse(); - return branchTrue(); - } - - Jump branchMul32(ResultCondition cond, RegisterID src, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - - if (cond == Overflow) { - RegisterID scr1 = claimScratch(); - RegisterID scr = claimScratch(); - m_assembler.dmullRegReg(src, dest); - m_assembler.stsmacl(dest); - m_assembler.movImm8(-31, scr); - m_assembler.movlRegReg(dest, scr1); - m_assembler.shaRegReg(scr1, scr); - m_assembler.stsmach(scr); - m_assembler.cmplRegReg(scr, scr1, SH4Condition(Equal)); - releaseScratch(scr1); - releaseScratch(scr); - return branchFalse(); - } - - m_assembler.imullRegReg(src, dest); - m_assembler.stsmacl(dest); - if (cond == Signed) { - // Check if dest is negative - m_assembler.cmppz(dest); - return branchFalse(); - } - - compare32(0, dest, static_cast<RelationalCondition>(cond)); - - if (cond == NonZero) // NotEqual - return branchFalse(); - return branchTrue(); - } - - Jump branchMul32(ResultCondition cond, TrustedImm32 imm, RegisterID src, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - - move(imm, scratchReg3); - if (src != dest) - move(src, dest); - - return branchMul32(cond, scratchReg3, dest); - } - - Jump branchSub32(ResultCondition cond, RegisterID src, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - - if (cond == Overflow) { - m_assembler.subvlRegReg(src, dest); - return branchTrue(); - } - - if (cond == Signed) { - // Check if dest is negative - m_assembler.sublRegReg(src, dest); - compare32(0, dest, LessThan); - return branchTrue(); - } - - sub32(src, dest); - compare32(0, dest, static_cast<RelationalCondition>(cond)); - - if (cond == NonZero) // NotEqual - return branchFalse(); - return branchTrue(); - } - - Jump branchSub32(ResultCondition cond, TrustedImm32 imm, RegisterID dest) - { - ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero)); - - move(imm, scratchReg3); - return branchSub32(cond, scratchReg3, dest); - } - - Jump branchSub32(ResultCondition cond, RegisterID src, TrustedImm32 imm, RegisterID dest) - { - move(imm, scratchReg3); - if (src != dest) - move(src, dest); - return branchSub32(cond, scratchReg3, dest); - } - - Jump branchSub32(ResultCondition cond, RegisterID src1, RegisterID src2, RegisterID dest) - { - if (src1 != dest) - move(src1, dest); - return branchSub32(cond, src2, dest); - } - - Jump branchOr32(ResultCondition cond, RegisterID src, RegisterID dest) - { - ASSERT((cond == Signed) || (cond == Zero) || (cond == NonZero)); - - if (cond == Signed) { - or32(src, dest); - compare32(0, dest, static_cast<RelationalCondition>(LessThan)); - return branchTrue(); - } - - or32(src, dest); - compare32(0, dest, static_cast<RelationalCondition>(cond)); - - if (cond == NonZero) // NotEqual - return branchFalse(); - return branchTrue(); - } - - void branchConvertDoubleToInt32(FPRegisterID src, RegisterID dest, JumpList& failureCases, FPRegisterID fpTemp) - { - m_assembler.ftrcdrmfpul(src); - m_assembler.stsfpulReg(dest); - convertInt32ToDouble(dest, fscratch); - failureCases.append(branchDouble(DoubleNotEqualOrUnordered, fscratch, src)); - - if (dest == SH4Registers::r0) - m_assembler.cmpEqImmR0(0, dest); - else { - m_assembler.movImm8(0, scratchReg3); - m_assembler.cmplRegReg(scratchReg3, dest, SH4Condition(Equal)); - } - failureCases.append(branchTrue()); - } - - void neg32(RegisterID dst) - { - m_assembler.neg(dst, dst); - } - - void urshift32(RegisterID shiftamount, RegisterID dest) - { - if (shiftamount == SH4Registers::r0) - m_assembler.andlImm8r(0x1f, shiftamount); - else { - RegisterID scr = claimScratch(); - m_assembler.loadConstant(0x1f, scr); - m_assembler.andlRegReg(scr, shiftamount); - releaseScratch(scr); - } - m_assembler.neg(shiftamount, shiftamount); - m_assembler.shllRegReg(dest, shiftamount); - } - - void urshift32(TrustedImm32 imm, RegisterID dest) - { - RegisterID scr = claimScratch(); - m_assembler.loadConstant(-(imm.m_value & 0x1f), scr); - m_assembler.shaRegReg(dest, scr); - releaseScratch(scr); - } - - void urshift32(RegisterID src, TrustedImm32 shiftamount, RegisterID dest) - { - if (src != dest) - move(src, dest); - - urshift32(shiftamount, dest); - } - - Call call() - { - return Call(m_assembler.call(), Call::Linkable); - } - - Call nearCall() - { - return Call(m_assembler.call(), Call::LinkableNear); - } - - Call call(RegisterID target) - { - return Call(m_assembler.call(target), Call::None); - } - - void call(Address address, RegisterID target) - { - load32(address.base, address.offset, target); - m_assembler.ensureSpace(m_assembler.maxInstructionSize + 2); - m_assembler.branch(JSR_OPCODE, target); - m_assembler.nop(); - } - - void breakpoint() - { - m_assembler.bkpt(); - m_assembler.nop(); - } - - Jump branchPtrWithPatch(RelationalCondition cond, RegisterID left, DataLabelPtr& dataLabel, TrustedImmPtr initialRightValue = TrustedImmPtr(0)) - { - RegisterID dataTempRegister = claimScratch(); - - dataLabel = moveWithPatch(initialRightValue, dataTempRegister); - m_assembler.cmplRegReg(dataTempRegister, left, SH4Condition(cond)); - releaseScratch(dataTempRegister); - - if (cond == NotEqual) - return branchFalse(); - return branchTrue(); - } - - Jump branchPtrWithPatch(RelationalCondition cond, Address left, DataLabelPtr& dataLabel, TrustedImmPtr initialRightValue = TrustedImmPtr(0)) - { - RegisterID scr = claimScratch(); - - m_assembler.loadConstant(left.offset, scr); - m_assembler.addlRegReg(left.base, scr); - m_assembler.movlMemReg(scr, scr); - RegisterID scr1 = claimScratch(); - dataLabel = moveWithPatch(initialRightValue, scr1); - m_assembler.cmplRegReg(scr1, scr, SH4Condition(cond)); - releaseScratch(scr); - releaseScratch(scr1); - - if (cond == NotEqual) - return branchFalse(); - return branchTrue(); - } - - void ret() - { - m_assembler.ret(); - m_assembler.nop(); - } - - DataLabelPtr storePtrWithPatch(TrustedImmPtr initialValue, ImplicitAddress address) - { - RegisterID scr = claimScratch(); - DataLabelPtr label = moveWithPatch(initialValue, scr); - store32(scr, address); - releaseScratch(scr); - return label; - } - - DataLabelPtr storePtrWithPatch(ImplicitAddress address) { return storePtrWithPatch(TrustedImmPtr(0), address); } - - int sizeOfConstantPool() - { - return m_assembler.sizeOfConstantPool(); - } - - Call tailRecursiveCall() - { - RegisterID scr = claimScratch(); - - m_assembler.loadConstantUnReusable(0x0, scr, true); - Jump m_jump = Jump(m_assembler.jmp(scr)); - releaseScratch(scr); - - return Call::fromTailJump(m_jump); - } - - Call makeTailRecursiveCall(Jump oldJump) - { - oldJump.link(this); - return tailRecursiveCall(); - } - - void nop() - { - m_assembler.nop(); - } - - static FunctionPtr readCallTarget(CodeLocationCall call) - { - return FunctionPtr(reinterpret_cast<void(*)()>(SH4Assembler::readCallTarget(call.dataLocation()))); - } - - static void replaceWithJump(CodeLocationLabel instructionStart, CodeLocationLabel destination) - { - RELEASE_ASSERT_NOT_REACHED(); - } - - static ptrdiff_t maxJumpReplacementSize() - { - RELEASE_ASSERT_NOT_REACHED(); - return 0; - } - - static bool canJumpReplacePatchableBranchPtrWithPatch() { return false; } - - static CodeLocationLabel startOfBranchPtrWithPatchOnRegister(CodeLocationDataLabelPtr label) - { - return label.labelAtOffset(0); - } - - static void revertJumpReplacementToBranchPtrWithPatch(CodeLocationLabel instructionStart, RegisterID, void* initialValue) - { - SH4Assembler::revertJump(instructionStart.dataLocation(), reinterpret_cast<uintptr_t>(initialValue) & 0xffff); - } - - static CodeLocationLabel startOfPatchableBranchPtrWithPatchOnAddress(CodeLocationDataLabelPtr) - { - UNREACHABLE_FOR_PLATFORM(); - return CodeLocationLabel(); - } - - static void revertJumpReplacementToPatchableBranchPtrWithPatch(CodeLocationLabel instructionStart, Address, void* initialValue) - { - UNREACHABLE_FOR_PLATFORM(); - } - -protected: - SH4Assembler::Condition SH4Condition(RelationalCondition cond) - { - return static_cast<SH4Assembler::Condition>(cond); - } - - SH4Assembler::Condition SH4Condition(ResultCondition cond) - { - return static_cast<SH4Assembler::Condition>(cond); - } -private: - template <typename, template <typename> class> friend class LinkBufferBase; - friend class RepatchBuffer; - - static void linkCall(void*, Call, FunctionPtr); - static void repatchCall(CodeLocationCall, CodeLocationLabel); - static void repatchCall(CodeLocationCall, FunctionPtr); -}; - -} // namespace JSC - -#endif // ENABLE(ASSEMBLER) - -#endif // MacroAssemblerSH4_h diff --git a/src/3rdparty/masm/assembler/MacroAssemblerX86.h b/src/3rdparty/masm/assembler/MacroAssemblerX86.h index 280cf427fc..e3e0bfe5e1 100644 --- a/src/3rdparty/masm/assembler/MacroAssemblerX86.h +++ b/src/3rdparty/masm/assembler/MacroAssemblerX86.h @@ -108,6 +108,11 @@ public: m_assembler.adcl_im(imm.m_value >> 31, reinterpret_cast<const char*>(address.m_ptr) + sizeof(int32_t)); } + void getEffectiveAddress(BaseIndex address, RegisterID dest) + { + return x86Lea32(address, dest); + } + void and32(TrustedImm32 imm, AbsoluteAddress address) { m_assembler.andl_im(imm.m_value, address.m_ptr); diff --git a/src/3rdparty/masm/assembler/MacroAssemblerX86Common.h b/src/3rdparty/masm/assembler/MacroAssemblerX86Common.h index 94771be6a7..769b4346ee 100644 --- a/src/3rdparty/masm/assembler/MacroAssemblerX86Common.h +++ b/src/3rdparty/masm/assembler/MacroAssemblerX86Common.h @@ -146,14 +146,24 @@ public: m_assembler.andl_rr(src, dest); } - void add32(RegisterID op1, RegisterID op2, RegisterID dest) + void add32(RegisterID a, RegisterID b, RegisterID dest) { - if (op2 == dest) { - add32(op1, dest); - } else { - move(op1, dest); - add32(op2, dest); + x86Lea32(BaseIndex(a, b, TimesOne), dest); + } + + void x86Lea32(BaseIndex index, RegisterID dest) + { + if (!index.scale && !index.offset) { + if (index.base == dest) { + add32(index.index, dest); + return; + } + if (index.index == dest) { + add32(index.base, dest); + return; + } } + m_assembler.leal_mr(index.offset, index.base, index.index, index.scale, dest); } void and32(TrustedImm32 imm, RegisterID dest) @@ -501,6 +511,11 @@ public: load32(address, dest); } + void load16Unaligned(ImplicitAddress address, RegisterID dest) + { + load16(address, dest); + } + void load16Unaligned(BaseIndex address, RegisterID dest) { load16(address, dest); @@ -558,6 +573,11 @@ public: m_assembler.movzwl_mr(address.offset, address.base, address.index, address.scale, dest); } + void load16(ImplicitAddress address, RegisterID dest) + { + m_assembler.movzwl_mr(address.offset, address.base, dest); + } + void load16(Address address, RegisterID dest) { m_assembler.movzwl_mr(address.offset, address.base, dest); diff --git a/src/3rdparty/masm/assembler/MacroAssemblerX86_64.h b/src/3rdparty/masm/assembler/MacroAssemblerX86_64.h index 002caaae78..f4349e1f93 100644 --- a/src/3rdparty/masm/assembler/MacroAssemblerX86_64.h +++ b/src/3rdparty/masm/assembler/MacroAssemblerX86_64.h @@ -243,6 +243,26 @@ public: add64(imm, Address(scratchRegister)); } + void x86Lea64(BaseIndex index, RegisterID dest) + { + if (!index.scale && !index.offset) { + if (index.base == dest) { + add64(index.index, dest); + return; + } + if (index.index == dest) { + add64(index.base, dest); + return; + } + } + m_assembler.leaq_mr(index.offset, index.base, index.index, index.scale, dest); + } + + void getEffectiveAddress(BaseIndex address, RegisterID dest) + { + return x86Lea64(address, dest); + } + void and64(RegisterID src, RegisterID dest) { m_assembler.andq_rr(src, dest); diff --git a/src/3rdparty/masm/assembler/SH4Assembler.h b/src/3rdparty/masm/assembler/SH4Assembler.h deleted file mode 100644 index b7a166ea99..0000000000 --- a/src/3rdparty/masm/assembler/SH4Assembler.h +++ /dev/null @@ -1,2152 +0,0 @@ -/* - * Copyright (C) 2009-2011 STMicroelectronics. All rights reserved. - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SH4Assembler_h -#define SH4Assembler_h - -#if ENABLE(ASSEMBLER) && CPU(SH4) - -#include "AssemblerBuffer.h" -#include "AssemblerBufferWithConstantPool.h" -#include "JITCompilationEffort.h" -#include <stdarg.h> -#include <stdint.h> -#include <stdio.h> -#include <wtf/Assertions.h> -#include <wtf/DataLog.h> -#include <wtf/Vector.h> - -#ifndef NDEBUG -#define SH4_ASSEMBLER_TRACING -#endif - -namespace JSC { -typedef uint16_t SH4Word; - -enum { - INVALID_OPCODE = 0xffff, - ADD_OPCODE = 0x300c, - ADDIMM_OPCODE = 0x7000, - ADDC_OPCODE = 0x300e, - ADDV_OPCODE = 0x300f, - AND_OPCODE = 0x2009, - ANDIMM_OPCODE = 0xc900, - DIV0_OPCODE = 0x2007, - DIV1_OPCODE = 0x3004, - BF_OPCODE = 0x8b00, - BFS_OPCODE = 0x8f00, - BRA_OPCODE = 0xa000, - BRAF_OPCODE = 0x0023, - NOP_OPCODE = 0x0009, - BSR_OPCODE = 0xb000, - RTS_OPCODE = 0x000b, - BT_OPCODE = 0x8900, - BTS_OPCODE = 0x8d00, - BSRF_OPCODE = 0x0003, - BRK_OPCODE = 0x003b, - FTRC_OPCODE = 0xf03d, - CMPEQ_OPCODE = 0x3000, - CMPEQIMM_OPCODE = 0x8800, - CMPGE_OPCODE = 0x3003, - CMPGT_OPCODE = 0x3007, - CMPHI_OPCODE = 0x3006, - CMPHS_OPCODE = 0x3002, - CMPPL_OPCODE = 0x4015, - CMPPZ_OPCODE = 0x4011, - CMPSTR_OPCODE = 0x200c, - DT_OPCODE = 0x4010, - FCMPEQ_OPCODE = 0xf004, - FCMPGT_OPCODE = 0xf005, - FMOV_OPCODE = 0xf00c, - FADD_OPCODE = 0xf000, - FMUL_OPCODE = 0xf002, - FSUB_OPCODE = 0xf001, - FDIV_OPCODE = 0xf003, - FNEG_OPCODE = 0xf04d, - JMP_OPCODE = 0x402b, - JSR_OPCODE = 0x400b, - LDSPR_OPCODE = 0x402a, - LDSLPR_OPCODE = 0x4026, - MOV_OPCODE = 0x6003, - MOVIMM_OPCODE = 0xe000, - MOVB_WRITE_RN_OPCODE = 0x2000, - MOVB_WRITE_RNDEC_OPCODE = 0x2004, - MOVB_WRITE_R0RN_OPCODE = 0x0004, - MOVB_WRITE_OFFGBR_OPCODE = 0xc000, - MOVB_WRITE_OFFRN_OPCODE = 0x8000, - MOVB_READ_RM_OPCODE = 0x6000, - MOVB_READ_RMINC_OPCODE = 0x6004, - MOVB_READ_R0RM_OPCODE = 0x000c, - MOVB_READ_OFFGBR_OPCODE = 0xc400, - MOVB_READ_OFFRM_OPCODE = 0x8400, - MOVL_WRITE_RN_OPCODE = 0x2002, - MOVL_WRITE_RNDEC_OPCODE = 0x2006, - MOVL_WRITE_R0RN_OPCODE = 0x0006, - MOVL_WRITE_OFFGBR_OPCODE = 0xc200, - MOVL_WRITE_OFFRN_OPCODE = 0x1000, - MOVL_READ_RM_OPCODE = 0x6002, - MOVL_READ_RMINC_OPCODE = 0x6006, - MOVL_READ_R0RM_OPCODE = 0x000e, - MOVL_READ_OFFGBR_OPCODE = 0xc600, - MOVL_READ_OFFPC_OPCODE = 0xd000, - MOVL_READ_OFFRM_OPCODE = 0x5000, - MOVW_WRITE_RN_OPCODE = 0x2001, - MOVW_READ_RM_OPCODE = 0x6001, - MOVW_READ_R0RM_OPCODE = 0x000d, - MOVW_READ_OFFRM_OPCODE = 0x8500, - MOVW_READ_OFFPC_OPCODE = 0x9000, - MOVA_READ_OFFPC_OPCODE = 0xc700, - MOVT_OPCODE = 0x0029, - MULL_OPCODE = 0x0007, - DMULL_L_OPCODE = 0x3005, - STSMACL_OPCODE = 0x001a, - STSMACH_OPCODE = 0x000a, - DMULSL_OPCODE = 0x300d, - NEG_OPCODE = 0x600b, - NEGC_OPCODE = 0x600a, - NOT_OPCODE = 0x6007, - OR_OPCODE = 0x200b, - ORIMM_OPCODE = 0xcb00, - ORBIMM_OPCODE = 0xcf00, - SETS_OPCODE = 0x0058, - SETT_OPCODE = 0x0018, - SHAD_OPCODE = 0x400c, - SHAL_OPCODE = 0x4020, - SHAR_OPCODE = 0x4021, - SHLD_OPCODE = 0x400d, - SHLL_OPCODE = 0x4000, - SHLL2_OPCODE = 0x4008, - SHLL8_OPCODE = 0x4018, - SHLL16_OPCODE = 0x4028, - SHLR_OPCODE = 0x4001, - SHLR2_OPCODE = 0x4009, - SHLR8_OPCODE = 0x4019, - SHLR16_OPCODE = 0x4029, - STSPR_OPCODE = 0x002a, - STSLPR_OPCODE = 0x4022, - FLOAT_OPCODE = 0xf02d, - SUB_OPCODE = 0x3008, - SUBC_OPCODE = 0x300a, - SUBV_OPCODE = 0x300b, - TST_OPCODE = 0x2008, - TSTIMM_OPCODE = 0xc800, - TSTB_OPCODE = 0xcc00, - EXTUB_OPCODE = 0x600c, - EXTUW_OPCODE = 0x600d, - XOR_OPCODE = 0x200a, - XORIMM_OPCODE = 0xca00, - XORB_OPCODE = 0xce00, - FMOVS_READ_RM_INC_OPCODE = 0xf009, - FMOVS_READ_RM_OPCODE = 0xf008, - FMOVS_READ_R0RM_OPCODE = 0xf006, - FMOVS_WRITE_RN_OPCODE = 0xf00a, - FMOVS_WRITE_RN_DEC_OPCODE = 0xf00b, - FMOVS_WRITE_R0RN_OPCODE = 0xf007, - FCNVDS_DRM_FPUL_OPCODE = 0xf0bd, - FCNVSD_FPUL_DRN_OPCODE = 0xf0ad, - LDS_RM_FPUL_OPCODE = 0x405a, - FLDS_FRM_FPUL_OPCODE = 0xf01d, - STS_FPUL_RN_OPCODE = 0x005a, - FSTS_FPUL_FRN_OPCODE = 0xF00d, - LDSFPSCR_OPCODE = 0x406a, - STSFPSCR_OPCODE = 0x006a, - LDSRMFPUL_OPCODE = 0x405a, - FSTSFPULFRN_OPCODE = 0xf00d, - FSQRT_OPCODE = 0xf06d, - FSCHG_OPCODE = 0xf3fd, - CLRT_OPCODE = 8, -}; - -namespace SH4Registers { -typedef enum { - r0, - r1, - r2, - r3, - r4, - r5, - r6, - r7, - r8, - r9, - r10, - r11, - r12, - r13, - r14, fp = r14, - r15, sp = r15, - pc, - pr, -} RegisterID; - -typedef enum { - fr0, dr0 = fr0, - fr1, - fr2, dr2 = fr2, - fr3, - fr4, dr4 = fr4, - fr5, - fr6, dr6 = fr6, - fr7, - fr8, dr8 = fr8, - fr9, - fr10, dr10 = fr10, - fr11, - fr12, dr12 = fr12, - fr13, - fr14, dr14 = fr14, - fr15, -} FPRegisterID; -} - -inline uint16_t getOpcodeGroup1(uint16_t opc, int rm, int rn) -{ - return (opc | ((rm & 0xf) << 8) | ((rn & 0xf) << 4)); -} - -inline uint16_t getOpcodeGroup2(uint16_t opc, int rm) -{ - return (opc | ((rm & 0xf) << 8)); -} - -inline uint16_t getOpcodeGroup3(uint16_t opc, int rm, int rn) -{ - return (opc | ((rm & 0xf) << 8) | (rn & 0xff)); -} - -inline uint16_t getOpcodeGroup4(uint16_t opc, int rm, int rn, int offset) -{ - return (opc | ((rm & 0xf) << 8) | ((rn & 0xf) << 4) | (offset & 0xf)); -} - -inline uint16_t getOpcodeGroup5(uint16_t opc, int rm) -{ - return (opc | (rm & 0xff)); -} - -inline uint16_t getOpcodeGroup6(uint16_t opc, int rm) -{ - return (opc | (rm & 0xfff)); -} - -inline uint16_t getOpcodeGroup7(uint16_t opc, int rm) -{ - return (opc | ((rm & 0x7) << 9)); -} - -inline uint16_t getOpcodeGroup8(uint16_t opc, int rm, int rn) -{ - return (opc | ((rm & 0x7) << 9) | ((rn & 0x7) << 5)); -} - -inline uint16_t getOpcodeGroup9(uint16_t opc, int rm, int rn) -{ - return (opc | ((rm & 0xf) << 8) | ((rn & 0x7) << 5)); -} - -inline uint16_t getOpcodeGroup10(uint16_t opc, int rm, int rn) -{ - return (opc | ((rm & 0x7) << 9) | ((rn & 0xf) << 4)); -} - -inline uint16_t getOpcodeGroup11(uint16_t opc, int rm, int rn) -{ - return (opc | ((rm & 0xf) << 4) | (rn & 0xf)); -} - -inline uint16_t getRn(uint16_t x) -{ - return ((x & 0xf00) >> 8); -} - -inline uint16_t getRm(uint16_t x) -{ - return ((x & 0xf0) >> 4); -} - -inline uint16_t getDisp(uint16_t x) -{ - return (x & 0xf); -} - -inline uint16_t getImm8(uint16_t x) -{ - return (x & 0xff); -} - -inline uint16_t getImm12(uint16_t x) -{ - return (x & 0xfff); -} - -inline uint16_t getDRn(uint16_t x) -{ - return ((x & 0xe00) >> 9); -} - -inline uint16_t getDRm(uint16_t x) -{ - return ((x & 0xe0) >> 5); -} - -class SH4Assembler { -public: - typedef SH4Registers::RegisterID RegisterID; - typedef SH4Registers::FPRegisterID FPRegisterID; - typedef AssemblerBufferWithConstantPool<512, 4, 2, SH4Assembler> SH4Buffer; - static const RegisterID scratchReg1 = SH4Registers::r3; - static const RegisterID scratchReg2 = SH4Registers::r11; - static const uint32_t maxInstructionSize = 16; - - enum { - padForAlign8 = 0x00, - padForAlign16 = 0x0009, - padForAlign32 = 0x00090009, - }; - - enum JumpType { - JumpFar, - JumpNear - }; - - SH4Assembler() - { - m_claimscratchReg = 0x0; - } - - // SH4 condition codes - typedef enum { - EQ = 0x0, // Equal - NE = 0x1, // Not Equal - HS = 0x2, // Unsigend Greater Than equal - HI = 0x3, // Unsigend Greater Than - LS = 0x4, // Unsigend Lower or Same - LI = 0x5, // Unsigend Lower - GE = 0x6, // Greater or Equal - LT = 0x7, // Less Than - GT = 0x8, // Greater Than - LE = 0x9, // Less or Equal - OF = 0xa, // OverFlow - SI = 0xb, // Signed - EQU= 0xc, // Equal or unordered(NaN) - NEU= 0xd, - GTU= 0xe, - GEU= 0xf, - LTU= 0x10, - LEU= 0x11, - } Condition; - - // Opaque label types -public: - bool isImmediate(int constant) - { - return ((constant <= 127) && (constant >= -128)); - } - - RegisterID claimScratch() - { - ASSERT((m_claimscratchReg != 0x3)); - - if (!(m_claimscratchReg & 0x1)) { - m_claimscratchReg = (m_claimscratchReg | 0x1); - return scratchReg1; - } - - m_claimscratchReg = (m_claimscratchReg | 0x2); - return scratchReg2; - } - - void releaseScratch(RegisterID scratchR) - { - if (scratchR == scratchReg1) - m_claimscratchReg = (m_claimscratchReg & 0x2); - else - m_claimscratchReg = (m_claimscratchReg & 0x1); - } - - // Stack operations - - void pushReg(RegisterID reg) - { - if (reg == SH4Registers::pr) { - oneShortOp(getOpcodeGroup2(STSLPR_OPCODE, SH4Registers::sp)); - return; - } - - oneShortOp(getOpcodeGroup1(MOVL_WRITE_RNDEC_OPCODE, SH4Registers::sp, reg)); - } - - void popReg(RegisterID reg) - { - if (reg == SH4Registers::pr) { - oneShortOp(getOpcodeGroup2(LDSLPR_OPCODE, SH4Registers::sp)); - return; - } - - oneShortOp(getOpcodeGroup1(MOVL_READ_RMINC_OPCODE, reg, SH4Registers::sp)); - } - - void movt(RegisterID dst) - { - uint16_t opc = getOpcodeGroup2(MOVT_OPCODE, dst); - oneShortOp(opc); - } - - // Arithmetic operations - - void addlRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(ADD_OPCODE, dst, src); - oneShortOp(opc); - } - - void addclRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(ADDC_OPCODE, dst, src); - oneShortOp(opc); - } - - void addvlRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(ADDV_OPCODE, dst, src); - oneShortOp(opc); - } - - void addlImm8r(int imm8, RegisterID dst) - { - ASSERT((imm8 <= 127) && (imm8 >= -128)); - - uint16_t opc = getOpcodeGroup3(ADDIMM_OPCODE, dst, imm8); - oneShortOp(opc); - } - - void andlRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(AND_OPCODE, dst, src); - oneShortOp(opc); - } - - void andlImm8r(int imm8, RegisterID dst) - { - ASSERT((imm8 <= 255) && (imm8 >= 0)); - ASSERT(dst == SH4Registers::r0); - - uint16_t opc = getOpcodeGroup5(ANDIMM_OPCODE, imm8); - oneShortOp(opc); - } - - void div1lRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(DIV1_OPCODE, dst, src); - oneShortOp(opc); - } - - void div0lRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(DIV0_OPCODE, dst, src); - oneShortOp(opc); - } - - void notlReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(NOT_OPCODE, dst, src); - oneShortOp(opc); - } - - void orlRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(OR_OPCODE, dst, src); - oneShortOp(opc); - } - - void orlImm8r(int imm8, RegisterID dst) - { - ASSERT((imm8 <= 255) && (imm8 >= 0)); - ASSERT(dst == SH4Registers::r0); - - uint16_t opc = getOpcodeGroup5(ORIMM_OPCODE, imm8); - oneShortOp(opc); - } - - void sublRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(SUB_OPCODE, dst, src); - oneShortOp(opc); - } - - void subvlRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(SUBV_OPCODE, dst, src); - oneShortOp(opc); - } - - void xorlRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(XOR_OPCODE, dst, src); - oneShortOp(opc); - } - - void xorlImm8r(int imm8, RegisterID dst) - { - ASSERT((imm8 <= 255) && (imm8 >= 0)); - ASSERT(dst == SH4Registers::r0); - - uint16_t opc = getOpcodeGroup5(XORIMM_OPCODE, imm8); - oneShortOp(opc); - } - - void shllImm8r(int imm, RegisterID dst) - { - switch (imm) { - case 1: - oneShortOp(getOpcodeGroup2(SHLL_OPCODE, dst)); - break; - case 2: - oneShortOp(getOpcodeGroup2(SHLL2_OPCODE, dst)); - break; - case 8: - oneShortOp(getOpcodeGroup2(SHLL8_OPCODE, dst)); - break; - case 16: - oneShortOp(getOpcodeGroup2(SHLL16_OPCODE, dst)); - break; - default: - RELEASE_ASSERT_NOT_REACHED(); - } - } - - void neg(RegisterID dst, RegisterID src) - { - uint16_t opc = getOpcodeGroup1(NEG_OPCODE, dst, src); - oneShortOp(opc); - } - - void shllRegReg(RegisterID dst, RegisterID rShift) - { - uint16_t opc = getOpcodeGroup1(SHLD_OPCODE, dst, rShift); - oneShortOp(opc); - } - - void shlrRegReg(RegisterID dst, RegisterID rShift) - { - neg(rShift, rShift); - shllRegReg(dst, rShift); - } - - void sharRegReg(RegisterID dst, RegisterID rShift) - { - neg(rShift, rShift); - shaRegReg(dst, rShift); - } - - void shaRegReg(RegisterID dst, RegisterID rShift) - { - uint16_t opc = getOpcodeGroup1(SHAD_OPCODE, dst, rShift); - oneShortOp(opc); - } - - void shlrImm8r(int imm, RegisterID dst) - { - switch (imm) { - case 1: - oneShortOp(getOpcodeGroup2(SHLR_OPCODE, dst)); - break; - case 2: - oneShortOp(getOpcodeGroup2(SHLR2_OPCODE, dst)); - break; - case 8: - oneShortOp(getOpcodeGroup2(SHLR8_OPCODE, dst)); - break; - case 16: - oneShortOp(getOpcodeGroup2(SHLR16_OPCODE, dst)); - break; - default: - RELEASE_ASSERT_NOT_REACHED(); - } - } - - void imullRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(MULL_OPCODE, dst, src); - oneShortOp(opc); - } - - void dmullRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(DMULL_L_OPCODE, dst, src); - oneShortOp(opc); - } - - void dmulslRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(DMULSL_OPCODE, dst, src); - oneShortOp(opc); - } - - void stsmacl(RegisterID reg) - { - uint16_t opc = getOpcodeGroup2(STSMACL_OPCODE, reg); - oneShortOp(opc); - } - - void stsmach(RegisterID reg) - { - uint16_t opc = getOpcodeGroup2(STSMACH_OPCODE, reg); - oneShortOp(opc); - } - - // Comparisons - - void cmplRegReg(RegisterID left, RegisterID right, Condition cond) - { - switch (cond) { - case NE: - oneShortOp(getOpcodeGroup1(CMPEQ_OPCODE, right, left)); - break; - case GT: - oneShortOp(getOpcodeGroup1(CMPGT_OPCODE, right, left)); - break; - case EQ: - oneShortOp(getOpcodeGroup1(CMPEQ_OPCODE, right, left)); - break; - case GE: - oneShortOp(getOpcodeGroup1(CMPGE_OPCODE, right, left)); - break; - case HS: - oneShortOp(getOpcodeGroup1(CMPHS_OPCODE, right, left)); - break; - case HI: - oneShortOp(getOpcodeGroup1(CMPHI_OPCODE, right, left)); - break; - case LI: - oneShortOp(getOpcodeGroup1(CMPHI_OPCODE, left, right)); - break; - case LS: - oneShortOp(getOpcodeGroup1(CMPHS_OPCODE, left, right)); - break; - case LE: - oneShortOp(getOpcodeGroup1(CMPGE_OPCODE, left, right)); - break; - case LT: - oneShortOp(getOpcodeGroup1(CMPGT_OPCODE, left, right)); - break; - default: - RELEASE_ASSERT_NOT_REACHED(); - } - } - - void cmppl(RegisterID reg) - { - uint16_t opc = getOpcodeGroup2(CMPPL_OPCODE, reg); - oneShortOp(opc); - } - - void cmppz(RegisterID reg) - { - uint16_t opc = getOpcodeGroup2(CMPPZ_OPCODE, reg); - oneShortOp(opc); - } - - void cmpEqImmR0(int imm, RegisterID dst) - { - uint16_t opc = getOpcodeGroup5(CMPEQIMM_OPCODE, imm); - oneShortOp(opc); - } - - void testlRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(TST_OPCODE, dst, src); - oneShortOp(opc); - } - - void testlImm8r(int imm, RegisterID dst) - { - ASSERT((dst == SH4Registers::r0) && (imm <= 255) && (imm >= 0)); - - uint16_t opc = getOpcodeGroup5(TSTIMM_OPCODE, imm); - oneShortOp(opc); - } - - void nop() - { - oneShortOp(NOP_OPCODE, false); - } - - void sett() - { - oneShortOp(SETT_OPCODE); - } - - void clrt() - { - oneShortOp(CLRT_OPCODE); - } - - void fschg() - { - oneShortOp(FSCHG_OPCODE); - } - - void bkpt() - { - oneShortOp(BRK_OPCODE, false); - } - - void branch(uint16_t opc, int label) - { - switch (opc) { - case BT_OPCODE: - ASSERT((label <= 127) && (label >= -128)); - oneShortOp(getOpcodeGroup5(BT_OPCODE, label)); - break; - case BRA_OPCODE: - ASSERT((label <= 2047) && (label >= -2048)); - oneShortOp(getOpcodeGroup6(BRA_OPCODE, label)); - break; - case BF_OPCODE: - ASSERT((label <= 127) && (label >= -128)); - oneShortOp(getOpcodeGroup5(BF_OPCODE, label)); - break; - default: - RELEASE_ASSERT_NOT_REACHED(); - } - } - - void branch(uint16_t opc, RegisterID reg) - { - switch (opc) { - case BRAF_OPCODE: - oneShortOp(getOpcodeGroup2(BRAF_OPCODE, reg)); - break; - case JMP_OPCODE: - oneShortOp(getOpcodeGroup2(JMP_OPCODE, reg)); - break; - case JSR_OPCODE: - oneShortOp(getOpcodeGroup2(JSR_OPCODE, reg)); - break; - case BSRF_OPCODE: - oneShortOp(getOpcodeGroup2(BSRF_OPCODE, reg)); - break; - default: - RELEASE_ASSERT_NOT_REACHED(); - } - } - - void ldspr(RegisterID reg) - { - uint16_t opc = getOpcodeGroup2(LDSPR_OPCODE, reg); - oneShortOp(opc); - } - - void stspr(RegisterID reg) - { - uint16_t opc = getOpcodeGroup2(STSPR_OPCODE, reg); - oneShortOp(opc); - } - - void extub(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(EXTUB_OPCODE, dst, src); - oneShortOp(opc); - } - - void extuw(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(EXTUW_OPCODE, dst, src); - oneShortOp(opc); - } - - // float operations - - void ldsrmfpul(RegisterID src) - { - uint16_t opc = getOpcodeGroup2(LDS_RM_FPUL_OPCODE, src); - oneShortOp(opc); - } - - void fneg(FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup2(FNEG_OPCODE, dst); - oneShortOp(opc, true, false); - } - - void fsqrt(FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup2(FSQRT_OPCODE, dst); - oneShortOp(opc, true, false); - } - - void stsfpulReg(RegisterID src) - { - uint16_t opc = getOpcodeGroup2(STS_FPUL_RN_OPCODE, src); - oneShortOp(opc); - } - - void floatfpulfrn(FPRegisterID src) - { - uint16_t opc = getOpcodeGroup2(FLOAT_OPCODE, src); - oneShortOp(opc, true, false); - } - - void fmull(FPRegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup1(FMUL_OPCODE, dst, src); - oneShortOp(opc, true, false); - } - - void fmovsReadrm(RegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup1(FMOVS_READ_RM_OPCODE, dst, src); - oneShortOp(opc, true, false); - } - - void fmovsWriterm(FPRegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(FMOVS_WRITE_RN_OPCODE, dst, src); - oneShortOp(opc, true, false); - } - - void fmovsWriter0r(FPRegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(FMOVS_WRITE_R0RN_OPCODE, dst, src); - oneShortOp(opc, true, false); - } - - void fmovsReadr0r(RegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup1(FMOVS_READ_R0RM_OPCODE, dst, src); - oneShortOp(opc, true, false); - } - - void fmovsReadrminc(RegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup1(FMOVS_READ_RM_INC_OPCODE, dst, src); - oneShortOp(opc, true, false); - } - - void fmovsWriterndec(FPRegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(FMOVS_WRITE_RN_DEC_OPCODE, dst, src); - oneShortOp(opc, true, false); - } - - void ftrcRegfpul(FPRegisterID src) - { - uint16_t opc = getOpcodeGroup2(FTRC_OPCODE, src); - oneShortOp(opc, true, false); - } - - void fldsfpul(FPRegisterID src) - { - uint16_t opc = getOpcodeGroup2(FLDS_FRM_FPUL_OPCODE, src); - oneShortOp(opc); - } - - void fstsfpul(FPRegisterID src) - { - uint16_t opc = getOpcodeGroup2(FSTS_FPUL_FRN_OPCODE, src); - oneShortOp(opc); - } - - void ldsfpscr(RegisterID reg) - { - uint16_t opc = getOpcodeGroup2(LDSFPSCR_OPCODE, reg); - oneShortOp(opc); - } - - void stsfpscr(RegisterID reg) - { - uint16_t opc = getOpcodeGroup2(STSFPSCR_OPCODE, reg); - oneShortOp(opc); - } - - // double operations - - void dcnvds(FPRegisterID src) - { - uint16_t opc = getOpcodeGroup7(FCNVDS_DRM_FPUL_OPCODE, src >> 1); - oneShortOp(opc); - } - - void dcnvsd(FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup7(FCNVSD_FPUL_DRN_OPCODE, dst >> 1); - oneShortOp(opc); - } - - void dcmppeq(FPRegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup8(FCMPEQ_OPCODE, dst >> 1, src >> 1); - oneShortOp(opc); - } - - void dcmppgt(FPRegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup8(FCMPGT_OPCODE, dst >> 1, src >> 1); - oneShortOp(opc); - } - - void dmulRegReg(FPRegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup8(FMUL_OPCODE, dst >> 1, src >> 1); - oneShortOp(opc); - } - - void dsubRegReg(FPRegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup8(FSUB_OPCODE, dst >> 1, src >> 1); - oneShortOp(opc); - } - - void daddRegReg(FPRegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup8(FADD_OPCODE, dst >> 1, src >> 1); - oneShortOp(opc); - } - - void dmovRegReg(FPRegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup8(FMOV_OPCODE, dst >> 1, src >> 1); - oneShortOp(opc); - } - - void ddivRegReg(FPRegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup8(FDIV_OPCODE, dst >> 1, src >> 1); - oneShortOp(opc); - } - - void dsqrt(FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup7(FSQRT_OPCODE, dst >> 1); - oneShortOp(opc); - } - - void dneg(FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup7(FNEG_OPCODE, dst >> 1); - oneShortOp(opc); - } - - void fmovReadrm(RegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup10(FMOVS_READ_RM_OPCODE, dst >> 1, src); - oneShortOp(opc); - } - - void fmovWriterm(FPRegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup9(FMOVS_WRITE_RN_OPCODE, dst, src >> 1); - oneShortOp(opc); - } - - void fmovWriter0r(FPRegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup9(FMOVS_WRITE_R0RN_OPCODE, dst, src >> 1); - oneShortOp(opc); - } - - void fmovReadr0r(RegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup10(FMOVS_READ_R0RM_OPCODE, dst >> 1, src); - oneShortOp(opc); - } - - void fmovReadrminc(RegisterID src, FPRegisterID dst) - { - uint16_t opc = getOpcodeGroup10(FMOVS_READ_RM_INC_OPCODE, dst >> 1, src); - oneShortOp(opc); - } - - void fmovWriterndec(FPRegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup9(FMOVS_WRITE_RN_DEC_OPCODE, dst, src >> 1); - oneShortOp(opc); - } - - void floatfpulDreg(FPRegisterID src) - { - uint16_t opc = getOpcodeGroup7(FLOAT_OPCODE, src >> 1); - oneShortOp(opc); - } - - void ftrcdrmfpul(FPRegisterID src) - { - uint16_t opc = getOpcodeGroup7(FTRC_OPCODE, src >> 1); - oneShortOp(opc); - } - - // Various move ops - - void movImm8(int imm8, RegisterID dst) - { - ASSERT((imm8 <= 127) && (imm8 >= -128)); - - uint16_t opc = getOpcodeGroup3(MOVIMM_OPCODE, dst, imm8); - oneShortOp(opc); - } - - void movlRegReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(MOV_OPCODE, dst, src); - oneShortOp(opc); - } - - void movwRegMem(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(MOVW_WRITE_RN_OPCODE, dst, src); - oneShortOp(opc); - } - - void movwMemReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(MOVW_READ_RM_OPCODE, dst, src); - oneShortOp(opc); - } - - void movwPCReg(int offset, RegisterID base, RegisterID dst) - { - ASSERT(base == SH4Registers::pc); - ASSERT((offset <= 255) && (offset >= 0)); - - uint16_t opc = getOpcodeGroup3(MOVW_READ_OFFPC_OPCODE, dst, offset); - oneShortOp(opc); - } - - void movwMemReg(int offset, RegisterID base, RegisterID dst) - { - ASSERT(dst == SH4Registers::r0); - - uint16_t opc = getOpcodeGroup11(MOVW_READ_OFFRM_OPCODE, base, offset); - oneShortOp(opc); - } - - void movwR0mr(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(MOVW_READ_R0RM_OPCODE, dst, src); - oneShortOp(opc); - } - - void movlRegMem(RegisterID src, int offset, RegisterID base) - { - ASSERT((offset <= 15) && (offset >= 0)); - - if (!offset) { - oneShortOp(getOpcodeGroup1(MOVL_WRITE_RN_OPCODE, base, src)); - return; - } - - oneShortOp(getOpcodeGroup4(MOVL_WRITE_OFFRN_OPCODE, base, src, offset)); - } - - void movlRegMem(RegisterID src, RegisterID base) - { - uint16_t opc = getOpcodeGroup1(MOVL_WRITE_RN_OPCODE, base, src); - oneShortOp(opc); - } - - void movlMemReg(int offset, RegisterID base, RegisterID dst) - { - if (base == SH4Registers::pc) { - ASSERT((offset <= 255) && (offset >= 0)); - oneShortOp(getOpcodeGroup3(MOVL_READ_OFFPC_OPCODE, dst, offset)); - return; - } - - ASSERT((offset <= 15) && (offset >= 0)); - if (!offset) { - oneShortOp(getOpcodeGroup1(MOVL_READ_RM_OPCODE, dst, base)); - return; - } - - oneShortOp(getOpcodeGroup4(MOVL_READ_OFFRM_OPCODE, dst, base, offset)); - } - - void movlMemRegCompact(int offset, RegisterID base, RegisterID dst) - { - oneShortOp(getOpcodeGroup4(MOVL_READ_OFFRM_OPCODE, dst, base, offset)); - } - - void movbRegMem(RegisterID src, RegisterID base) - { - uint16_t opc = getOpcodeGroup1(MOVB_WRITE_RN_OPCODE, base, src); - oneShortOp(opc); - } - - void movbMemReg(int offset, RegisterID base, RegisterID dst) - { - ASSERT(dst == SH4Registers::r0); - - uint16_t opc = getOpcodeGroup11(MOVB_READ_OFFRM_OPCODE, base, offset); - oneShortOp(opc); - } - - void movbR0mr(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(MOVB_READ_R0RM_OPCODE, dst, src); - oneShortOp(opc); - } - - void movbMemReg(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(MOVB_READ_RM_OPCODE, dst, src); - oneShortOp(opc); - } - - void movlMemReg(RegisterID base, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(MOVL_READ_RM_OPCODE, dst, base); - oneShortOp(opc); - } - - void movlMemRegIn(RegisterID base, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(MOVL_READ_RMINC_OPCODE, dst, base); - oneShortOp(opc); - } - - void movlR0mr(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(MOVL_READ_R0RM_OPCODE, dst, src); - oneShortOp(opc); - } - - void movlRegMemr0(RegisterID src, RegisterID dst) - { - uint16_t opc = getOpcodeGroup1(MOVL_WRITE_R0RN_OPCODE, dst, src); - oneShortOp(opc); - } - - void movlImm8r(int imm8, RegisterID dst) - { - ASSERT((imm8 <= 127) && (imm8 >= -128)); - - uint16_t opc = getOpcodeGroup3(MOVIMM_OPCODE, dst, imm8); - oneShortOp(opc); - } - - void loadConstant(uint32_t constant, RegisterID dst) - { - if (((int)constant <= 0x7f) && ((int)constant >= -0x80)) { - movImm8(constant, dst); - return; - } - - uint16_t opc = getOpcodeGroup3(MOVIMM_OPCODE, dst, 0); - - m_buffer.ensureSpace(maxInstructionSize, sizeof(uint32_t)); - printInstr(getOpcodeGroup3(MOVIMM_OPCODE, dst, constant), m_buffer.codeSize()); - m_buffer.putShortWithConstantInt(opc, constant, true); - } - - void loadConstantUnReusable(uint32_t constant, RegisterID dst, bool ensureSpace = false) - { - uint16_t opc = getOpcodeGroup3(MOVIMM_OPCODE, dst, 0); - - if (ensureSpace) - m_buffer.ensureSpace(maxInstructionSize, sizeof(uint32_t)); - - printInstr(getOpcodeGroup3(MOVIMM_OPCODE, dst, constant), m_buffer.codeSize()); - m_buffer.putShortWithConstantInt(opc, constant); - } - - // Flow control - - AssemblerLabel call() - { - RegisterID scr = claimScratch(); - m_buffer.ensureSpace(maxInstructionSize + 4, sizeof(uint32_t)); - loadConstantUnReusable(0x0, scr); - branch(JSR_OPCODE, scr); - nop(); - releaseScratch(scr); - return m_buffer.label(); - } - - AssemblerLabel call(RegisterID dst) - { - m_buffer.ensureSpace(maxInstructionSize + 2); - branch(JSR_OPCODE, dst); - nop(); - return m_buffer.label(); - } - - AssemblerLabel jmp() - { - RegisterID scr = claimScratch(); - m_buffer.ensureSpace(maxInstructionSize + 4, sizeof(uint32_t)); - AssemblerLabel label = m_buffer.label(); - loadConstantUnReusable(0x0, scr); - branch(BRAF_OPCODE, scr); - nop(); - releaseScratch(scr); - return label; - } - - void extraInstrForBranch(RegisterID dst) - { - loadConstantUnReusable(0x0, dst); - nop(); - nop(); - } - - AssemblerLabel jmp(RegisterID dst) - { - jmpReg(dst); - return m_buffer.label(); - } - - void jmpReg(RegisterID dst) - { - m_buffer.ensureSpace(maxInstructionSize + 2); - branch(JMP_OPCODE, dst); - nop(); - } - - AssemblerLabel jne() - { - AssemblerLabel label = m_buffer.label(); - branch(BF_OPCODE, 0); - return label; - } - - AssemblerLabel je() - { - AssemblerLabel label = m_buffer.label(); - branch(BT_OPCODE, 0); - return label; - } - - AssemblerLabel bra() - { - AssemblerLabel label = m_buffer.label(); - branch(BRA_OPCODE, 0); - return label; - } - - void ret() - { - m_buffer.ensureSpace(maxInstructionSize + 2); - oneShortOp(RTS_OPCODE, false); - } - - AssemblerLabel labelIgnoringWatchpoints() - { - m_buffer.ensureSpaceForAnyInstruction(); - return m_buffer.label(); - } - - AssemblerLabel label() - { - m_buffer.ensureSpaceForAnyInstruction(); - return m_buffer.label(); - } - - int sizeOfConstantPool() - { - return m_buffer.sizeOfConstantPool(); - } - - AssemblerLabel align(int alignment) - { - m_buffer.ensureSpace(maxInstructionSize + 2); - while (!m_buffer.isAligned(alignment)) { - nop(); - m_buffer.ensureSpace(maxInstructionSize + 2); - } - return label(); - } - - static void changePCrelativeAddress(int offset, uint16_t* instructionPtr, uint32_t newAddress) - { - uint32_t address = (offset << 2) + ((reinterpret_cast<uint32_t>(instructionPtr) + 4) &(~0x3)); - *reinterpret_cast<uint32_t*>(address) = newAddress; - } - - static uint32_t readPCrelativeAddress(int offset, uint16_t* instructionPtr) - { - uint32_t address = (offset << 2) + ((reinterpret_cast<uint32_t>(instructionPtr) + 4) &(~0x3)); - return *reinterpret_cast<uint32_t*>(address); - } - - static uint16_t* getInstructionPtr(void* code, int offset) - { - return reinterpret_cast<uint16_t*> (reinterpret_cast<uint32_t>(code) + offset); - } - - static void linkJump(void* code, AssemblerLabel from, void* to) - { - ASSERT(from.isSet()); - - uint16_t* instructionPtr = getInstructionPtr(code, from.m_offset); - uint16_t instruction = *instructionPtr; - int offsetBits = (reinterpret_cast<uint32_t>(to) - reinterpret_cast<uint32_t>(code)) - from.m_offset; - - if (((instruction & 0xff00) == BT_OPCODE) || ((instruction & 0xff00) == BF_OPCODE)) { - /* BT label ==> BF 2 - nop LDR reg - nop braf @reg - nop nop - */ - offsetBits -= 8; - instruction ^= 0x0202; - *instructionPtr++ = instruction; - changePCrelativeAddress((*instructionPtr & 0xff), instructionPtr, offsetBits); - instruction = (BRAF_OPCODE | (*instructionPtr++ & 0xf00)); - *instructionPtr = instruction; - printBlockInstr(instructionPtr - 2, from.m_offset, 3); - return; - } - - /* MOV #imm, reg => LDR reg - braf @reg braf @reg - nop nop - */ - ASSERT((*(instructionPtr + 1) & BRAF_OPCODE) == BRAF_OPCODE); - - offsetBits -= 4; - if (offsetBits >= -4096 && offsetBits <= 4094) { - *instructionPtr = getOpcodeGroup6(BRA_OPCODE, offsetBits >> 1); - *(++instructionPtr) = NOP_OPCODE; - printBlockInstr(instructionPtr - 1, from.m_offset, 2); - return; - } - - changePCrelativeAddress((*instructionPtr & 0xff), instructionPtr, offsetBits - 2); - printInstr(*instructionPtr, from.m_offset + 2); - } - - static void linkCall(void* code, AssemblerLabel from, void* to) - { - uint16_t* instructionPtr = getInstructionPtr(code, from.m_offset); - instructionPtr -= 3; - changePCrelativeAddress((*instructionPtr & 0xff), instructionPtr, reinterpret_cast<uint32_t>(to)); - } - - static void linkPointer(void* code, AssemblerLabel where, void* value) - { - uint16_t* instructionPtr = getInstructionPtr(code, where.m_offset); - changePCrelativeAddress((*instructionPtr & 0xff), instructionPtr, reinterpret_cast<uint32_t>(value)); - } - - static unsigned getCallReturnOffset(AssemblerLabel call) - { - ASSERT(call.isSet()); - return call.m_offset; - } - - static uint32_t* getLdrImmAddressOnPool(SH4Word* insn, uint32_t* constPool) - { - return (constPool + (*insn & 0xff)); - } - - static SH4Word patchConstantPoolLoad(SH4Word load, int value) - { - return ((load & ~0xff) | value); - } - - static SH4Buffer::TwoShorts placeConstantPoolBarrier(int offset) - { - ASSERT(((offset >> 1) <=2047) && ((offset >> 1) >= -2048)); - - SH4Buffer::TwoShorts m_barrier; - m_barrier.high = (BRA_OPCODE | (offset >> 1)); - m_barrier.low = NOP_OPCODE; - printInstr(((BRA_OPCODE | (offset >> 1))), 0); - printInstr(NOP_OPCODE, 0); - return m_barrier; - } - - static void patchConstantPoolLoad(void* loadAddr, void* constPoolAddr) - { - SH4Word* instructionPtr = reinterpret_cast<SH4Word*>(loadAddr); - SH4Word instruction = *instructionPtr; - SH4Word index = instruction & 0xff; - - if ((instruction & 0xf000) != MOVIMM_OPCODE) - return; - - ASSERT((((reinterpret_cast<uint32_t>(constPoolAddr) - reinterpret_cast<uint32_t>(loadAddr)) + index * 4)) < 1024); - - int offset = reinterpret_cast<uint32_t>(constPoolAddr) + (index * 4) - ((reinterpret_cast<uint32_t>(instructionPtr) & ~0x03) + 4); - instruction &=0xf00; - instruction |= 0xd000; - offset &= 0x03ff; - instruction |= (offset >> 2); - *instructionPtr = instruction; - printInstr(instruction, reinterpret_cast<uint32_t>(loadAddr)); - } - - static void repatchPointer(void* where, void* value) - { - patchPointer(where, value); - } - - static void* readPointer(void* code) - { - return reinterpret_cast<void*>(readInt32(code)); - } - - static void repatchInt32(void* where, int32_t value) - { - uint16_t* instructionPtr = reinterpret_cast<uint16_t*>(where); - changePCrelativeAddress((*instructionPtr & 0xff), instructionPtr, value); - } - - static void repatchCompact(void* where, int32_t value) - { - ASSERT(value >= 0); - ASSERT(value <= 60); - *reinterpret_cast<uint16_t*>(where) = ((*reinterpret_cast<uint16_t*>(where) & 0xfff0) | (value >> 2)); - cacheFlush(reinterpret_cast<uint16_t*>(where), sizeof(uint16_t)); - } - - static void relinkCall(void* from, void* to) - { - uint16_t* instructionPtr = reinterpret_cast<uint16_t*>(from); - instructionPtr -= 3; - changePCrelativeAddress((*instructionPtr & 0xff), instructionPtr, reinterpret_cast<uint32_t>(to)); - } - - static void relinkJump(void* from, void* to) - { - uint16_t* instructionPtr = reinterpret_cast<uint16_t*> (from); - uint16_t instruction = *instructionPtr; - int32_t offsetBits = (reinterpret_cast<uint32_t>(to) - reinterpret_cast<uint32_t>(from)); - - if (((*instructionPtr & 0xff00) == BT_OPCODE) || ((*instructionPtr & 0xff00) == BF_OPCODE)) { - offsetBits -= 8; - instructionPtr++; - changePCrelativeAddress((*instructionPtr & 0xff), instructionPtr, offsetBits); - instruction = (BRAF_OPCODE | (*instructionPtr++ & 0xf00)); - *instructionPtr = instruction; - printBlockInstr(instructionPtr, reinterpret_cast<uint32_t>(from) + 1, 3); - return; - } - - ASSERT((*(instructionPtr + 1) & BRAF_OPCODE) == BRAF_OPCODE); - offsetBits -= 4; - if (offsetBits >= -4096 && offsetBits <= 4094) { - *instructionPtr = getOpcodeGroup6(BRA_OPCODE, offsetBits >> 1); - *(++instructionPtr) = NOP_OPCODE; - printBlockInstr(instructionPtr - 2, reinterpret_cast<uint32_t>(from), 2); - return; - } - - changePCrelativeAddress((*instructionPtr & 0xff), instructionPtr, offsetBits - 2); - printInstr(*instructionPtr, reinterpret_cast<uint32_t>(from)); - } - - // Linking & patching - - static void revertJump(void* instructionStart, SH4Word imm) - { - SH4Word *insn = reinterpret_cast<SH4Word*>(instructionStart); - SH4Word disp; - - ASSERT((insn[0] & 0xf000) == MOVL_READ_OFFPC_OPCODE); - - disp = insn[0] & 0x00ff; - insn += 2 + (disp << 1); // PC += 4 + (disp*4) - insn = (SH4Word *) ((unsigned) insn & (~3)); - insn[0] = imm; - cacheFlush(insn, sizeof(SH4Word)); - } - - void linkJump(AssemblerLabel from, AssemblerLabel to, JumpType type = JumpFar) - { - ASSERT(to.isSet()); - ASSERT(from.isSet()); - - uint16_t* instructionPtr = getInstructionPtr(data(), from.m_offset); - uint16_t instruction = *instructionPtr; - int offsetBits; - - if (type == JumpNear) { - ASSERT((instruction == BT_OPCODE) || (instruction == BF_OPCODE) || (instruction == BRA_OPCODE)); - int offset = (codeSize() - from.m_offset) - 4; - *instructionPtr++ = instruction | (offset >> 1); - printInstr(*instructionPtr, from.m_offset + 2); - return; - } - - if (((instruction & 0xff00) == BT_OPCODE) || ((instruction & 0xff00) == BF_OPCODE)) { - /* BT label => BF 2 - nop LDR reg - nop braf @reg - nop nop - */ - offsetBits = (to.m_offset - from.m_offset) - 8; - instruction ^= 0x0202; - *instructionPtr++ = instruction; - if ((*instructionPtr & 0xf000) == 0xe000) { - uint32_t* addr = getLdrImmAddressOnPool(instructionPtr, m_buffer.poolAddress()); - *addr = offsetBits; - } else - changePCrelativeAddress((*instructionPtr & 0xff), instructionPtr, offsetBits); - instruction = (BRAF_OPCODE | (*instructionPtr++ & 0xf00)); - *instructionPtr = instruction; - printBlockInstr(instructionPtr - 2, from.m_offset, 3); - return; - } - - /* MOV # imm, reg => LDR reg - braf @reg braf @reg - nop nop - */ - ASSERT((*(instructionPtr + 1) & BRAF_OPCODE) == BRAF_OPCODE); - offsetBits = (to.m_offset - from.m_offset) - 4; - if (offsetBits >= -4096 && offsetBits <= 4094) { - *instructionPtr = getOpcodeGroup6(BRA_OPCODE, offsetBits >> 1); - *(++instructionPtr) = NOP_OPCODE; - printBlockInstr(instructionPtr - 1, from.m_offset, 2); - return; - } - - instruction = *instructionPtr; - if ((instruction & 0xf000) == 0xe000) { - uint32_t* addr = getLdrImmAddressOnPool(instructionPtr, m_buffer.poolAddress()); - *addr = offsetBits - 2; - printInstr(*instructionPtr, from.m_offset + 2); - return; - } - - changePCrelativeAddress((*instructionPtr & 0xff), instructionPtr, offsetBits - 2); - printInstr(*instructionPtr, from.m_offset + 2); - } - - static void* getRelocatedAddress(void* code, AssemblerLabel label) - { - return reinterpret_cast<void*>(reinterpret_cast<char*>(code) + label.m_offset); - } - - static int getDifferenceBetweenLabels(AssemblerLabel a, AssemblerLabel b) - { - return b.m_offset - a.m_offset; - } - - static void patchPointer(void* code, AssemblerLabel where, void* value) - { - patchPointer(reinterpret_cast<uint32_t*>(code) + where.m_offset, value); - } - - static void patchPointer(void* code, void* value) - { - patchInt32(code, reinterpret_cast<uint32_t>(value)); - } - - static void patchInt32(void* code, uint32_t value) - { - changePCrelativeAddress((*(reinterpret_cast<uint16_t*>(code)) & 0xff), reinterpret_cast<uint16_t*>(code), value); - } - - static uint32_t readInt32(void* code) - { - return readPCrelativeAddress((*(reinterpret_cast<uint16_t*>(code)) & 0xff), reinterpret_cast<uint16_t*>(code)); - } - - static void* readCallTarget(void* from) - { - uint16_t* instructionPtr = static_cast<uint16_t*>(from); - instructionPtr -= 3; - return reinterpret_cast<void*>(readPCrelativeAddress((*instructionPtr & 0xff), instructionPtr)); - } - - PassRefPtr<ExecutableMemoryHandle> executableCopy(JSGlobalData& globalData, void* ownerUID, JITCompilationEffort effort) - { - return m_buffer.executableCopy(globalData, ownerUID, effort); - } - - static void cacheFlush(void* code, size_t size) - { -#if !OS(LINUX) -#error "The cacheFlush support is missing on this platform." -#elif defined CACHEFLUSH_D_L2 - syscall(__NR_cacheflush, reinterpret_cast<unsigned>(code), size, CACHEFLUSH_D_WB | CACHEFLUSH_I | CACHEFLUSH_D_L2); -#else - syscall(__NR_cacheflush, reinterpret_cast<unsigned>(code), size, CACHEFLUSH_D_WB | CACHEFLUSH_I); -#endif - } - - void prefix(uint16_t pre) - { - m_buffer.putByte(pre); - } - - void oneShortOp(uint16_t opcode, bool checksize = true, bool isDouble = true) - { - printInstr(opcode, m_buffer.codeSize(), isDouble); - if (checksize) - m_buffer.ensureSpace(maxInstructionSize); - m_buffer.putShortUnchecked(opcode); - } - - void ensureSpace(int space) - { - m_buffer.ensureSpace(space); - } - - void ensureSpace(int insnSpace, int constSpace) - { - m_buffer.ensureSpace(insnSpace, constSpace); - } - - // Administrative methods - - void* data() const { return m_buffer.data(); } - size_t codeSize() const { return m_buffer.codeSize(); } - -#ifdef SH4_ASSEMBLER_TRACING - static void printInstr(uint16_t opc, unsigned size, bool isdoubleInst = true) - { - if (!getenv("JavaScriptCoreDumpJIT")) - return; - - const char *format = 0; - printfStdoutInstr("offset: 0x%8.8x\t", size); - switch (opc) { - case BRK_OPCODE: - format = " BRK\n"; - break; - case NOP_OPCODE: - format = " NOP\n"; - break; - case RTS_OPCODE: - format =" *RTS\n"; - break; - case SETS_OPCODE: - format = " SETS\n"; - break; - case SETT_OPCODE: - format = " SETT\n"; - break; - case CLRT_OPCODE: - format = " CLRT\n"; - break; - case FSCHG_OPCODE: - format = " FSCHG\n"; - break; - } - if (format) { - printfStdoutInstr(format); - return; - } - switch (opc & 0xf0ff) { - case BRAF_OPCODE: - format = " *BRAF R%d\n"; - break; - case DT_OPCODE: - format = " DT R%d\n"; - break; - case CMPPL_OPCODE: - format = " CMP/PL R%d\n"; - break; - case CMPPZ_OPCODE: - format = " CMP/PZ R%d\n"; - break; - case JMP_OPCODE: - format = " *JMP @R%d\n"; - break; - case JSR_OPCODE: - format = " *JSR @R%d\n"; - break; - case LDSPR_OPCODE: - format = " LDS R%d, PR\n"; - break; - case LDSLPR_OPCODE: - format = " LDS.L @R%d+, PR\n"; - break; - case MOVT_OPCODE: - format = " MOVT R%d\n"; - break; - case SHAL_OPCODE: - format = " SHAL R%d\n"; - break; - case SHAR_OPCODE: - format = " SHAR R%d\n"; - break; - case SHLL_OPCODE: - format = " SHLL R%d\n"; - break; - case SHLL2_OPCODE: - format = " SHLL2 R%d\n"; - break; - case SHLL8_OPCODE: - format = " SHLL8 R%d\n"; - break; - case SHLL16_OPCODE: - format = " SHLL16 R%d\n"; - break; - case SHLR_OPCODE: - format = " SHLR R%d\n"; - break; - case SHLR2_OPCODE: - format = " SHLR2 R%d\n"; - break; - case SHLR8_OPCODE: - format = " SHLR8 R%d\n"; - break; - case SHLR16_OPCODE: - format = " SHLR16 R%d\n"; - break; - case STSPR_OPCODE: - format = " STS PR, R%d\n"; - break; - case STSLPR_OPCODE: - format = " STS.L PR, @-R%d\n"; - break; - case LDS_RM_FPUL_OPCODE: - format = " LDS R%d, FPUL\n"; - break; - case STS_FPUL_RN_OPCODE: - format = " STS FPUL, R%d \n"; - break; - case FLDS_FRM_FPUL_OPCODE: - format = " FLDS FR%d, FPUL\n"; - break; - case FSTS_FPUL_FRN_OPCODE: - format = " FSTS FPUL, R%d \n"; - break; - case LDSFPSCR_OPCODE: - format = " LDS R%d, FPSCR \n"; - break; - case STSFPSCR_OPCODE: - format = " STS FPSCR, R%d \n"; - break; - case STSMACL_OPCODE: - format = " STS MACL, R%d \n"; - break; - case STSMACH_OPCODE: - format = " STS MACH, R%d \n"; - break; - case BSRF_OPCODE: - format = " *BSRF R%d"; - break; - case FTRC_OPCODE: - format = " FTRC FR%d, FPUL\n"; - break; - } - if (format) { - printfStdoutInstr(format, getRn(opc)); - return; - } - switch (opc & 0xf0ff) { - case FNEG_OPCODE: - format = " FNEG DR%d\n"; - break; - case FLOAT_OPCODE: - format = " FLOAT DR%d\n"; - break; - case FTRC_OPCODE: - format = " FTRC FR%d, FPUL\n"; - break; - case FSQRT_OPCODE: - format = " FSQRT FR%d\n"; - break; - case FCNVDS_DRM_FPUL_OPCODE: - format = " FCNVDS FR%d, FPUL\n"; - break; - case FCNVSD_FPUL_DRN_OPCODE: - format = " FCNVSD FPUL, FR%d\n"; - break; - } - if (format) { - if (isdoubleInst) - printfStdoutInstr(format, getDRn(opc) << 1); - else - printfStdoutInstr(format, getRn(opc)); - return; - } - switch (opc & 0xf00f) { - case ADD_OPCODE: - format = " ADD R%d, R%d\n"; - break; - case ADDC_OPCODE: - format = " ADDC R%d, R%d\n"; - break; - case ADDV_OPCODE: - format = " ADDV R%d, R%d\n"; - break; - case AND_OPCODE: - format = " AND R%d, R%d\n"; - break; - case DIV1_OPCODE: - format = " DIV1 R%d, R%d\n"; - break; - case CMPEQ_OPCODE: - format = " CMP/EQ R%d, R%d\n"; - break; - case CMPGE_OPCODE: - format = " CMP/GE R%d, R%d\n"; - break; - case CMPGT_OPCODE: - format = " CMP/GT R%d, R%d\n"; - break; - case CMPHI_OPCODE: - format = " CMP/HI R%d, R%d\n"; - break; - case CMPHS_OPCODE: - format = " CMP/HS R%d, R%d\n"; - break; - case MOV_OPCODE: - format = " MOV R%d, R%d\n"; - break; - case MOVB_WRITE_RN_OPCODE: - format = " MOV.B R%d, @R%d\n"; - break; - case MOVB_WRITE_RNDEC_OPCODE: - format = " MOV.B R%d, @-R%d\n"; - break; - case MOVB_WRITE_R0RN_OPCODE: - format = " MOV.B R%d, @(R0, R%d)\n"; - break; - case MOVB_READ_RM_OPCODE: - format = " MOV.B @R%d, R%d\n"; - break; - case MOVB_READ_RMINC_OPCODE: - format = " MOV.B @R%d+, R%d\n"; - break; - case MOVB_READ_R0RM_OPCODE: - format = " MOV.B @(R0, R%d), R%d\n"; - break; - case MOVL_WRITE_RN_OPCODE: - format = " MOV.L R%d, @R%d\n"; - break; - case MOVL_WRITE_RNDEC_OPCODE: - format = " MOV.L R%d, @-R%d\n"; - break; - case MOVL_WRITE_R0RN_OPCODE: - format = " MOV.L R%d, @(R0, R%d)\n"; - break; - case MOVL_READ_RM_OPCODE: - format = " MOV.L @R%d, R%d\n"; - break; - case MOVL_READ_RMINC_OPCODE: - format = " MOV.L @R%d+, R%d\n"; - break; - case MOVL_READ_R0RM_OPCODE: - format = " MOV.L @(R0, R%d), R%d\n"; - break; - case MULL_OPCODE: - format = " MUL.L R%d, R%d\n"; - break; - case DMULL_L_OPCODE: - format = " DMULU.L R%d, R%d\n"; - break; - case DMULSL_OPCODE: - format = " DMULS.L R%d, R%d\n"; - break; - case NEG_OPCODE: - format = " NEG R%d, R%d\n"; - break; - case NEGC_OPCODE: - format = " NEGC R%d, R%d\n"; - break; - case NOT_OPCODE: - format = " NOT R%d, R%d\n"; - break; - case OR_OPCODE: - format = " OR R%d, R%d\n"; - break; - case SHAD_OPCODE: - format = " SHAD R%d, R%d\n"; - break; - case SHLD_OPCODE: - format = " SHLD R%d, R%d\n"; - break; - case SUB_OPCODE: - format = " SUB R%d, R%d\n"; - break; - case SUBC_OPCODE: - format = " SUBC R%d, R%d\n"; - break; - case SUBV_OPCODE: - format = " SUBV R%d, R%d\n"; - break; - case TST_OPCODE: - format = " TST R%d, R%d\n"; - break; - case XOR_OPCODE: - format = " XOR R%d, R%d\n";break; - case MOVW_WRITE_RN_OPCODE: - format = " MOV.W R%d, @R%d\n"; - break; - case MOVW_READ_RM_OPCODE: - format = " MOV.W @R%d, R%d\n"; - break; - case MOVW_READ_R0RM_OPCODE: - format = " MOV.W @(R0, R%d), R%d\n"; - break; - case EXTUB_OPCODE: - format = " EXTU.B R%d, R%d\n"; - break; - case EXTUW_OPCODE: - format = " EXTU.W R%d, R%d\n"; - break; - } - if (format) { - printfStdoutInstr(format, getRm(opc), getRn(opc)); - return; - } - switch (opc & 0xf00f) { - case FSUB_OPCODE: - format = " FSUB FR%d, FR%d\n"; - break; - case FADD_OPCODE: - format = " FADD FR%d, FR%d\n"; - break; - case FDIV_OPCODE: - format = " FDIV FR%d, FR%d\n"; - break; - case FMUL_OPCODE: - format = " DMULL FR%d, FR%d\n"; - break; - case FMOV_OPCODE: - format = " FMOV FR%d, FR%d\n"; - break; - case FCMPEQ_OPCODE: - format = " FCMP/EQ FR%d, FR%d\n"; - break; - case FCMPGT_OPCODE: - format = " FCMP/GT FR%d, FR%d\n"; - break; - } - if (format) { - if (isdoubleInst) - printfStdoutInstr(format, getDRm(opc) << 1, getDRn(opc) << 1); - else - printfStdoutInstr(format, getRm(opc), getRn(opc)); - return; - } - switch (opc & 0xf00f) { - case FMOVS_WRITE_RN_DEC_OPCODE: - format = " %s FR%d, @-R%d\n"; - break; - case FMOVS_WRITE_RN_OPCODE: - format = " %s FR%d, @R%d\n"; - break; - case FMOVS_WRITE_R0RN_OPCODE: - format = " %s FR%d, @(R0, R%d)\n"; - break; - } - if (format) { - if (isdoubleInst) - printfStdoutInstr(format, "FMOV", getDRm(opc) << 1, getDRn(opc)); - else - printfStdoutInstr(format, "FMOV.S", getRm(opc), getRn(opc)); - return; - } - switch (opc & 0xf00f) { - case FMOVS_READ_RM_OPCODE: - format = " %s @R%d, FR%d\n"; - break; - case FMOVS_READ_RM_INC_OPCODE: - format = " %s @R%d+, FR%d\n"; - break; - case FMOVS_READ_R0RM_OPCODE: - format = " %s @(R0, R%d), FR%d\n"; - break; - } - if (format) { - if (isdoubleInst) - printfStdoutInstr(format, "FMOV", getDRm(opc), getDRn(opc) << 1); - else - printfStdoutInstr(format, "FMOV.S", getRm(opc), getRn(opc)); - return; - } - switch (opc & 0xff00) { - case BF_OPCODE: - format = " BF %d\n"; - break; - case BFS_OPCODE: - format = " *BF/S %d\n"; - break; - case ANDIMM_OPCODE: - format = " AND #%d, R0\n"; - break; - case BT_OPCODE: - format = " BT %d\n"; - break; - case BTS_OPCODE: - format = " *BT/S %d\n"; - break; - case CMPEQIMM_OPCODE: - format = " CMP/EQ #%d, R0\n"; - break; - case MOVB_WRITE_OFFGBR_OPCODE: - format = " MOV.B R0, @(%d, GBR)\n"; - break; - case MOVB_READ_OFFGBR_OPCODE: - format = " MOV.B @(%d, GBR), R0\n"; - break; - case MOVL_WRITE_OFFGBR_OPCODE: - format = " MOV.L R0, @(%d, GBR)\n"; - break; - case MOVL_READ_OFFGBR_OPCODE: - format = " MOV.L @(%d, GBR), R0\n"; - break; - case MOVA_READ_OFFPC_OPCODE: - format = " MOVA @(%d, PC), R0\n"; - break; - case ORIMM_OPCODE: - format = " OR #%d, R0\n"; - break; - case ORBIMM_OPCODE: - format = " OR.B #%d, @(R0, GBR)\n"; - break; - case TSTIMM_OPCODE: - format = " TST #%d, R0\n"; - break; - case TSTB_OPCODE: - format = " TST.B %d, @(R0, GBR)\n"; - break; - case XORIMM_OPCODE: - format = " XOR #%d, R0\n"; - break; - case XORB_OPCODE: - format = " XOR.B %d, @(R0, GBR)\n"; - break; - } - if (format) { - printfStdoutInstr(format, getImm8(opc)); - return; - } - switch (opc & 0xff00) { - case MOVB_WRITE_OFFRN_OPCODE: - format = " MOV.B R0, @(%d, R%d)\n"; - break; - case MOVB_READ_OFFRM_OPCODE: - format = " MOV.B @(%d, R%d), R0\n"; - break; - } - if (format) { - printfStdoutInstr(format, getDisp(opc), getRm(opc)); - return; - } - switch (opc & 0xf000) { - case BRA_OPCODE: - format = " *BRA %d\n"; - break; - case BSR_OPCODE: - format = " *BSR %d\n"; - break; - } - if (format) { - printfStdoutInstr(format, getImm12(opc)); - return; - } - switch (opc & 0xf000) { - case MOVL_READ_OFFPC_OPCODE: - format = " MOV.L @(%d, PC), R%d\n"; - break; - case ADDIMM_OPCODE: - format = " ADD #%d, R%d\n"; - break; - case MOVIMM_OPCODE: - format = " MOV #%d, R%d\n"; - break; - case MOVW_READ_OFFPC_OPCODE: - format = " MOV.W @(%d, PC), R%d\n"; - break; - } - if (format) { - printfStdoutInstr(format, getImm8(opc), getRn(opc)); - return; - } - switch (opc & 0xf000) { - case MOVL_WRITE_OFFRN_OPCODE: - format = " MOV.L R%d, @(%d, R%d)\n"; - printfStdoutInstr(format, getRm(opc), getDisp(opc), getRn(opc)); - break; - case MOVL_READ_OFFRM_OPCODE: - format = " MOV.L @(%d, R%d), R%d\n"; - printfStdoutInstr(format, getDisp(opc), getRm(opc), getRn(opc)); - break; - } - } - - static void printfStdoutInstr(const char* format, ...) - { - if (getenv("JavaScriptCoreDumpJIT")) { - va_list args; - va_start(args, format); - vprintfStdoutInstr(format, args); - va_end(args); - } - } - - static void vprintfStdoutInstr(const char* format, va_list args) - { - if (getenv("JavaScriptCoreDumpJIT")) - WTF::dataLogFV(format, args); - } - - static void printBlockInstr(uint16_t* first, unsigned offset, int nbInstr) - { - printfStdoutInstr(">> repatch instructions after link\n"); - for (int i = 0; i <= nbInstr; i++) - printInstr(*(first + i), offset + i); - printfStdoutInstr(">> end repatch\n"); - } -#else - static void printInstr(uint16_t opc, unsigned size, bool isdoubleInst = true) { }; - static void printBlockInstr(uint16_t* first, unsigned offset, int nbInstr) { }; -#endif - - static void replaceWithLoad(void* instructionStart) - { - SH4Word* insPtr = reinterpret_cast<SH4Word*>(instructionStart); - - insPtr += 2; // skip MOV and ADD opcodes - - if (((*insPtr) & 0xf00f) != MOVL_READ_RM_OPCODE) { - *insPtr = MOVL_READ_RM_OPCODE | (*insPtr & 0x0ff0); - cacheFlush(insPtr, sizeof(SH4Word)); - } - } - - static void replaceWithAddressComputation(void* instructionStart) - { - SH4Word* insPtr = reinterpret_cast<SH4Word*>(instructionStart); - - insPtr += 2; // skip MOV and ADD opcodes - - if (((*insPtr) & 0xf00f) != MOV_OPCODE) { - *insPtr = MOV_OPCODE | (*insPtr & 0x0ff0); - cacheFlush(insPtr, sizeof(SH4Word)); - } - } - -private: - SH4Buffer m_buffer; - int m_claimscratchReg; -}; - -} // namespace JSC - -#endif // ENABLE(ASSEMBLER) && CPU(SH4) - -#endif // SH4Assembler_h diff --git a/src/3rdparty/masm/assembler/X86Assembler.h b/src/3rdparty/masm/assembler/X86Assembler.h index b71cf290f8..2257cb2b9a 100644 --- a/src/3rdparty/masm/assembler/X86Assembler.h +++ b/src/3rdparty/masm/assembler/X86Assembler.h @@ -26,6 +26,8 @@ #ifndef X86Assembler_h #define X86Assembler_h +#include <Platform.h> + #if ENABLE(ASSEMBLER) && (CPU(X86) || CPU(X86_64)) #include "AssemblerBuffer.h" @@ -1417,11 +1419,22 @@ public: { m_formatter.oneByteOp(OP_LEA, dst, base, offset); } + + void leal_mr(int offset, RegisterID base, RegisterID index, int scale, RegisterID dst) + { + m_formatter.oneByteOp(OP_LEA, dst, base, index, scale, offset); + } + #if CPU(X86_64) void leaq_mr(int offset, RegisterID base, RegisterID dst) { m_formatter.oneByteOp64(OP_LEA, dst, base, offset); } + + void leaq_mr(int offset, RegisterID base, RegisterID index, int scale, RegisterID dst) + { + m_formatter.oneByteOp64(OP_LEA, dst, base, index, scale, offset); + } #endif // Flow control: @@ -1892,9 +1905,18 @@ public: ASSERT(to.isSet()); char* code = reinterpret_cast<char*>(m_formatter.data()); - ASSERT(!reinterpret_cast<int32_t*>(code + from.m_offset)[-1]); + ASSERT(!loadPossiblyUnaligned<int32_t>(code, from.m_offset, -1)); setRel32(code + from.m_offset, code + to.m_offset); } + + template<typename T> + T loadPossiblyUnaligned(char *ptr, size_t offset, int idx) + { + T *t_ptr = &reinterpret_cast<T*>(ptr + offset)[idx]; + T val; + memcpy(&val, t_ptr, sizeof(T)); + return val; + } static void linkJump(void* code, AssemblerLabel from, void* to) { @@ -2095,7 +2117,14 @@ private: static void setInt32(void* where, int32_t value) { - reinterpret_cast<int32_t*>(where)[-1] = value; + storePossiblyUnaligned<int32_t>(where, -1, value); + } + + template <typename T> + static void storePossiblyUnaligned(void *where, int idx, T value) + { + T *ptr = &reinterpret_cast<T*>(where)[idx]; + memcpy(ptr, &value, sizeof(T)); } static void setInt8(void* where, int8_t value) diff --git a/src/3rdparty/masm/masm-defs.pri b/src/3rdparty/masm/masm-defs.pri index 34d6a67451..08c46a7ac2 100644 --- a/src/3rdparty/masm/masm-defs.pri +++ b/src/3rdparty/masm/masm-defs.pri @@ -20,6 +20,7 @@ INCLUDEPATH += $$PWD/assembler INCLUDEPATH += $$PWD/runtime INCLUDEPATH += $$PWD/wtf INCLUDEPATH += $$PWD/stubs +INCLUDEPATH += $$PWD/stubs/runtime INCLUDEPATH += $$PWD/stubs/wtf INCLUDEPATH += $$PWD diff --git a/src/3rdparty/masm/masm.pri b/src/3rdparty/masm/masm.pri index 7dfb24f4b8..0e63ac2ce5 100644 --- a/src/3rdparty/masm/masm.pri +++ b/src/3rdparty/masm/masm.pri @@ -79,10 +79,12 @@ HEADERS += $$PWD/disassembler/ARM64/A64DOpcode.h !qmldevtools_build { SOURCES += $$PWD/yarr/YarrCanonicalizeUCS2.cpp \ + $$PWD/yarr/YarrCanonicalizeUnicode.cpp \ $$PWD/yarr/YarrInterpreter.cpp \ $$PWD/yarr/YarrJIT.cpp \ $$PWD/yarr/YarrPattern.cpp \ - $$PWD/yarr/YarrSyntaxChecker.cpp + $$PWD/yarr/YarrSyntaxChecker.cpp \ + $$PWD/stubs/yarr/YarrUnicodeProperties.cpp HEADERS += $$PWD/yarr/Yarr.h \ $$PWD/yarr/YarrCanonicalizeUCS2.h \ @@ -90,7 +92,8 @@ HEADERS += $$PWD/yarr/Yarr.h \ $$PWD/yarr/YarrJIT.h \ $$PWD/yarr/YarrParser.h \ $$PWD/yarr/YarrPattern.h \ - $$PWD/yarr/YarrSyntaxChecker.h + $$PWD/yarr/YarrSyntaxChecker.h \ + $$PWD/yarr/YarrUnicodeProperties.h } # @@ -107,7 +110,7 @@ debug_and_release { INCLUDEPATH += $$GENERATEDDIR retgen.output = $$GENERATEDDIR/RegExpJitTables.h -retgen.script = $$PWD/create_regex_tables +retgen.script = $$PWD/yarr/create_regex_tables retgen.input = retgen.script retgen.CONFIG += no_link retgen.commands = python $$retgen.script > ${QMAKE_FILE_OUT} @@ -125,3 +128,8 @@ QMAKE_EXTRA_COMPILERS += retgen } } } + +linux { + requires(qtConfig(dlopen)) + QMAKE_USE_PRIVATE += libdl +} diff --git a/src/3rdparty/masm/qt_attribution.json b/src/3rdparty/masm/qt_attribution.json index f6803c6b1c..aab413ad40 100644 --- a/src/3rdparty/masm/qt_attribution.json +++ b/src/3rdparty/masm/qt_attribution.json @@ -8,7 +8,7 @@ "License": "BSD 2-clause \"Simplified\" License", "LicenseId": "BSD-2-Clause", "LicenseFile": "LICENSE", - "Copyright": "Copyright (C) 2003-2015 Apple Inc. All rights reserved. + "Copyright": "Copyright (C) 2003-2018 Apple Inc. All rights reserved. Copyright (C) 2007 Justin Haygood (jhaygood@reaktix.com) Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved. (http://www.torchmobile.com/) Copyright (C) 2009, 2010 University of Szeged diff --git a/src/3rdparty/masm/stubs/ExecutableAllocator.h b/src/3rdparty/masm/stubs/ExecutableAllocator.h index 16b17bd3cd..156b24b4e8 100644 --- a/src/3rdparty/masm/stubs/ExecutableAllocator.h +++ b/src/3rdparty/masm/stubs/ExecutableAllocator.h @@ -82,7 +82,7 @@ struct ExecutableMemoryHandle : public RefCounted<ExecutableMemoryHandle> { inline bool isManaged() const { return true; } - void* start() { return m_allocation->start(); } + void *start() { return m_allocation->start(); } size_t sizeInBytes() { return m_size; } QV4::ExecutableAllocator::ChunkOfPages *chunk() const @@ -98,7 +98,7 @@ struct ExecutableAllocator { : realAllocator(alloc) {} - PassRefPtr<ExecutableMemoryHandle> allocate(JSGlobalData&, size_t size, void*, int) + Ref<ExecutableMemoryHandle> allocate(JSGlobalData&, size_t size, void*, int) { return adoptRef(new ExecutableMemoryHandle(realAllocator, size)); } diff --git a/src/3rdparty/masm/stubs/Options.h b/src/3rdparty/masm/stubs/Options.h index e03cc67690..6339c06033 100644 --- a/src/3rdparty/masm/stubs/Options.h +++ b/src/3rdparty/masm/stubs/Options.h @@ -44,6 +44,8 @@ namespace JSC { struct Options { static bool showDisassembly(); static bool showDFGDisassembly() { return true; } + static bool zeroStackFrame() { return true; } + static bool dumpCompiledRegExpPatterns() { return false; } }; } diff --git a/src/3rdparty/masm/stubs/SuperSampler.h b/src/3rdparty/masm/stubs/SuperSampler.h new file mode 100644 index 0000000000..422de528e1 --- /dev/null +++ b/src/3rdparty/masm/stubs/SuperSampler.h @@ -0,0 +1,50 @@ +/**************************************************************************** +** +** Copyright (C) 2018 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtQml module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#pragma once + +namespace WTF { + +struct SuperSamplerScope { + SuperSamplerScope(bool) {} +}; + +} + +using WTF::SuperSamplerScope; diff --git a/src/3rdparty/masm/stubs/WTFStubs.cpp b/src/3rdparty/masm/stubs/WTFStubs.cpp index ea7e2d78e0..b26d10b3ab 100644 --- a/src/3rdparty/masm/stubs/WTFStubs.cpp +++ b/src/3rdparty/masm/stubs/WTFStubs.cpp @@ -91,7 +91,7 @@ void dataLogFV(const char* format, va_list args) { char buffer[1024]; qvsnprintf(buffer, sizeof(buffer), format, args); - qDebug("%s", buffer); + qDebug().nospace().noquote() << buffer; } void dataLogF(const char* format, ...) @@ -101,12 +101,12 @@ void dataLogF(const char* format, ...) va_start(args, format); qvsnprintf(buffer, sizeof(buffer), format, args); va_end(args); - qDebug("%s", buffer); + qDebug().nospace().noquote() << buffer; } void dataLogFString(const char* str) { - qDebug("%s", str); + qDebug().nospace().noquote() << str; } } diff --git a/src/3rdparty/masm/stubs/runtime/ConcurrentJSLock.h b/src/3rdparty/masm/stubs/runtime/ConcurrentJSLock.h new file mode 100644 index 0000000000..43868feadb --- /dev/null +++ b/src/3rdparty/masm/stubs/runtime/ConcurrentJSLock.h @@ -0,0 +1,53 @@ +/**************************************************************************** +** +** Copyright (C) 2018 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtQml module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#pragma once + +namespace JSC { + +class NoLock { +public: + void lock() { } + void unlock() { } + bool isHeld() { return false; } +}; + +typedef NoLock ConcurrentJSLock; + +} // namespace JSC diff --git a/src/3rdparty/masm/stubs/runtime/RegExpKey.h b/src/3rdparty/masm/stubs/runtime/RegExpKey.h new file mode 100644 index 0000000000..392f66fb83 --- /dev/null +++ b/src/3rdparty/masm/stubs/runtime/RegExpKey.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2010 University of Szeged + * Copyright (C) 2010 Renata Hodovan (hodovan@inf.u-szeged.hu) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <wtf/text/WTFString.h> + +namespace JSC { + +enum RegExpFlags : int8_t { + NoFlags = 0, + FlagGlobal = 1, + FlagIgnoreCase = 2, + FlagMultiline = 4, + FlagSticky = 8, + FlagUnicode = 16, + FlagDotAll = 32, + InvalidFlags = 64, + DeletedValueFlags = -1 +}; + +} // namespace JSC diff --git a/src/3rdparty/masm/stubs/runtime/VM.h b/src/3rdparty/masm/stubs/runtime/VM.h new file mode 100644 index 0000000000..94cce814f3 --- /dev/null +++ b/src/3rdparty/masm/stubs/runtime/VM.h @@ -0,0 +1,50 @@ +/**************************************************************************** +** +** Copyright (C) 2018 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtQml module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#ifndef MASM_VM_H +#define MASM_VM_H + +#include <qv4engine_p.h> + +namespace JSC { + +class VM : public QV4::ExecutionEngine {}; + +} + +#endif // MASM_VM_H diff --git a/src/3rdparty/masm/stubs/wtf/HashMap.h b/src/3rdparty/masm/stubs/wtf/HashMap.h new file mode 100644 index 0000000000..888c6cceb0 --- /dev/null +++ b/src/3rdparty/masm/stubs/wtf/HashMap.h @@ -0,0 +1,58 @@ +/**************************************************************************** +** +** Copyright (C) 2018 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtQml module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#ifndef HASHMAP_H +#define HASHMAP_H + +#include <QtCore/qhash.h> + +namespace WTF { + +template<typename Key, typename Value> +class HashMap final : public QHash<Key, Value> +{ +public: + void add(const Key &k, const Value &v) { QHash<Key, Value>::insert(k, v); } + Value get(const Key &k) { return QHash<Key, Value>::value(k); } +}; + +} + +using WTF::HashMap; + +#endif diff --git a/src/3rdparty/masm/stubs/wtf/HashSet.h b/src/3rdparty/masm/stubs/wtf/HashSet.h new file mode 100644 index 0000000000..3765c9a8b1 --- /dev/null +++ b/src/3rdparty/masm/stubs/wtf/HashSet.h @@ -0,0 +1,67 @@ +/**************************************************************************** +** +** Copyright (C) 2018 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtQml module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#ifndef HASHSET_H +#define HASHSET_H + +#include <QtCore/qset.h> + +namespace WTF { + +template<typename Key> +class HashSet final : public QSet<Key> +{ +public: + struct SetAddResult { + bool isNewEntry; + }; + SetAddResult add(const Key &k) + { + if (QSet<Key>::find(k) == QSet<Key>::constEnd()) { + QSet<Key>::insert(k); + return { true }; + } + return { false }; + } +}; + +} + +using WTF::HashSet; + +#endif diff --git a/src/3rdparty/masm/stubs/wtf/Optional.h b/src/3rdparty/masm/stubs/wtf/Optional.h new file mode 100644 index 0000000000..44fa3ee62d --- /dev/null +++ b/src/3rdparty/masm/stubs/wtf/Optional.h @@ -0,0 +1,83 @@ +/**************************************************************************** +** +** Copyright (C) 2018 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtQml module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#pragma once + +#include <QtCore/qglobal.h> + +#if __cplusplus > 201402L && QT_HAS_INCLUDE(<optional>) +#include <optional> +#else + +namespace std { + +struct nullopt_t {}; + +constexpr nullopt_t nullopt {}; + +template<typename T> +class optional { +public: + optional() = default; + optional(nullopt_t) {} + optional(const T &v) : _value(v), _hasValue(true) {} + ~optional() = default; + + optional &operator =(nullopt_t) { + _value = T(); + _hasValue = false; + return *this; + } + + T operator->() { return _value; } + T operator*() { return _value; } + + operator bool() const { return _hasValue; } + bool has_value() const { return _hasValue; } + + T value() const { return _value; } + +private: + T _value = T(); + bool _hasValue = false; +}; + +} + +#endif diff --git a/src/3rdparty/masm/stubs/wtf/PassRefPtr.h b/src/3rdparty/masm/stubs/wtf/PassRefPtr.h index f072e70dd7..cc03a5d651 100644 --- a/src/3rdparty/masm/stubs/wtf/PassRefPtr.h +++ b/src/3rdparty/masm/stubs/wtf/PassRefPtr.h @@ -83,14 +83,22 @@ public: private: PassRefPtr<T>& operator=(const PassRefPtr<T>& t); - template <typename PtrType> friend PassRefPtr<PtrType> adoptRef(PtrType*); +protected: mutable T* m_ptr; }; template <typename T> -PassRefPtr<T> adoptRef(T* ptr) +class Ref : public PassRefPtr<T> { - PassRefPtr<T> result; + using PassRefPtr<T>::PassRefPtr; + + template <typename PtrType> friend Ref<PtrType> adoptRef(PtrType*); +}; + +template <typename T> +Ref<T> adoptRef(T* ptr) +{ + Ref<T> result; result.m_ptr = ptr; return result; } diff --git a/src/3rdparty/masm/stubs/wtf/Vector.h b/src/3rdparty/masm/stubs/wtf/Vector.h index 2025acf8a9..f4f4dc5cf4 100644 --- a/src/3rdparty/masm/stubs/wtf/Vector.h +++ b/src/3rdparty/masm/stubs/wtf/Vector.h @@ -55,6 +55,8 @@ class Vector : public std::vector<T> { public: Vector() {} Vector(int initialSize) : std::vector<T>(initialSize) {} + Vector(const Vector &other) : std::vector<T>(other) {} + Vector(std::initializer_list<T> list) : std::vector<T>(list) {} inline void append(const T& value) { this->push_back(value); } @@ -63,6 +65,9 @@ public: inline void append(const OtherType& other) { this->push_back(T(other)); } + inline void append(T&& other) + { this->push_back(std::move(other)); } + inline void append(const Vector<T>& vector) { this->insert(this->end(), vector.begin(), vector.end()); @@ -80,6 +85,8 @@ public: this->push_back(*it); } + unsigned size() const { return static_cast<unsigned>(std::vector<T>::size()); } + using std::vector<T>::insert; inline void reserveInitialCapacity(size_t size) { this->reserve(size); } diff --git a/src/3rdparty/masm/stubs/wtf/text/CString.h b/src/3rdparty/masm/stubs/wtf/text/CString.h index 26f74f7593..7129f5049e 100644 --- a/src/3rdparty/masm/stubs/wtf/text/CString.h +++ b/src/3rdparty/masm/stubs/wtf/text/CString.h @@ -39,4 +39,8 @@ #ifndef CSTRING_H #define CSTRING_H +class CString : public QByteArray { + +}; + #endif // CSTRING_H diff --git a/src/3rdparty/masm/stubs/wtf/text/StringBuilder.h b/src/3rdparty/masm/stubs/wtf/text/StringBuilder.h new file mode 100644 index 0000000000..a382f6da83 --- /dev/null +++ b/src/3rdparty/masm/stubs/wtf/text/StringBuilder.h @@ -0,0 +1,52 @@ +/**************************************************************************** +** +** Copyright (C) 2018 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtQml module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#pragma once + +#include <wtf/text/WTFString.h> + +namespace WTF { + +struct StringBuilder : public String +{ + String toString() const { return *this; } +}; + +} + +using WTF::StringBuilder; diff --git a/src/3rdparty/masm/stubs/wtf/text/WTFString.h b/src/3rdparty/masm/stubs/wtf/text/WTFString.h index 928c684fdb..da5183f734 100644 --- a/src/3rdparty/masm/stubs/wtf/text/WTFString.h +++ b/src/3rdparty/masm/stubs/wtf/text/WTFString.h @@ -42,26 +42,33 @@ #include <QString> #include <wtf/ASCIICType.h> #include <wtf/unicode/Unicode.h> +#include <memory> namespace WTF { +class PrintStream; + class String : public QString { public: + String() = default; String(const QString& s) : QString(s) {} bool is8Bit() const { return false; } const unsigned char *characters8() const { return 0; } const UChar *characters16() const { return reinterpret_cast<const UChar*>(constData()); } template <typename T> - const T* getCharacters() const; + const T* characters() const; + + bool operator!() const { return isEmpty(); } + void dump(PrintStream &) const {} }; template <> -inline const unsigned char* String::getCharacters<unsigned char>() const { return characters8(); } +inline const unsigned char* String::characters<unsigned char>() const { return characters8(); } template <> -inline const UChar* String::getCharacters<UChar>() const { return characters16(); } +inline const UChar* String::characters<UChar>() const { return characters16(); } } @@ -70,4 +77,6 @@ namespace JSC { using WTF::String; } +#define WTFMove(value) std::move(value) + #endif // WTFSTRING_H diff --git a/src/3rdparty/masm/stubs/wtf/unicode/Unicode.h b/src/3rdparty/masm/stubs/wtf/unicode/Unicode.h index d61cec5c4e..0f7f005c89 100644 --- a/src/3rdparty/masm/stubs/wtf/unicode/Unicode.h +++ b/src/3rdparty/masm/stubs/wtf/unicode/Unicode.h @@ -43,6 +43,7 @@ typedef unsigned char LChar; typedef unsigned short UChar; +typedef int32_t UChar32; namespace Unicode { inline UChar toLower(UChar ch) { @@ -52,6 +53,35 @@ namespace Unicode { inline UChar toUpper(UChar ch) { return QChar::toUpper(ch); } + inline UChar32 u_tolower(UChar32 ch) { + return QChar::toLower(ch); + } + inline UChar32 u_toupper(UChar32 ch) { + return QChar::toUpper(ch); + } } +using Unicode::u_toupper; +using Unicode::u_tolower; + +#define U16_IS_LEAD(ch) QChar::isHighSurrogate((ch)) +#define U16_IS_TRAIL(ch) QChar::isLowSurrogate((ch)) +#define U16_GET_SUPPLEMENTARY(lead, trail) static_cast<UChar32>(QChar::surrogateToUcs4((lead), (trail))) +#define U_IS_BMP(ch) ((ch) < 0x10000) +#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) +#define UCHAR_MAX_VALUE 0x10ffff + +#define U_MASK(category) (1u << (category)) +#define U_GET_GC_MASK(c) U_MASK(QChar::category((c))) +#define U_GC_L_MASK (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK) +#define U_GC_LU_MASK U_MASK(QChar::Letter_Uppercase) +#define U_GC_LL_MASK U_MASK(QChar::Letter_Lowercase) +#define U_GC_LT_MASK U_MASK(QChar::Letter_Titlecase) +#define U_GC_LM_MASK U_MASK(QChar::Letter_Modifier) +#define U_GC_LO_MASK U_MASK(QChar::Letter_Other) +#define U_GC_MN_MASK U_MASK(QChar::Mark_NonSpacing) +#define U_GC_MC_MASK U_MASK(QChar::Mark_SpacingCombining) +#define U_GC_ND_MASK U_MASK(QChar::Number_DecimalDigit) +#define U_GC_PC_MASK U_MASK(QChar::Punctuation_Connector) + #endif // UNICODE_H diff --git a/src/3rdparty/masm/stubs/wtf/unicode/utypes.h b/src/3rdparty/masm/stubs/wtf/unicode/utypes.h new file mode 100644 index 0000000000..e1b4ff90a6 --- /dev/null +++ b/src/3rdparty/masm/stubs/wtf/unicode/utypes.h @@ -0,0 +1 @@ +#include <unicode/Unicode.h> diff --git a/src/3rdparty/masm/stubs/yarr/YarrUnicodeProperties.cpp b/src/3rdparty/masm/stubs/yarr/YarrUnicodeProperties.cpp new file mode 100644 index 0000000000..99c925f406 --- /dev/null +++ b/src/3rdparty/masm/stubs/yarr/YarrUnicodeProperties.cpp @@ -0,0 +1,70 @@ +/**************************************************************************** +** +** Copyright (C) 2018 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtQml module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "config.h" +#include "yarr/YarrUnicodeProperties.h" +#include "qchar.h" + +#include "yarr/Yarr.h" +#include "yarr/YarrPattern.h" + +using namespace WTF; + +namespace JSC { namespace Yarr { + +std::optional<BuiltInCharacterClassID> unicodeMatchPropertyValue(WTF::String unicodePropertyName, WTF::String unicodePropertyValue) +{ + Q_UNUSED(unicodePropertyName); + Q_UNUSED(unicodePropertyValue); + return std::nullopt; +} + +std::optional<BuiltInCharacterClassID> unicodeMatchProperty(WTF::String unicodePropertyValue) +{ + Q_UNUSED(unicodePropertyValue); + return std::nullopt; +} + +std::unique_ptr<CharacterClass> createUnicodeCharacterClassFor(BuiltInCharacterClassID unicodeClassID) +{ + Q_UNUSED(unicodeClassID); + return nullptr; +} + +} } // namespace JSC::Yarr diff --git a/src/3rdparty/masm/wtf/Assertions.h b/src/3rdparty/masm/wtf/Assertions.h index 491e434498..be25d43826 100644 --- a/src/3rdparty/masm/wtf/Assertions.h +++ b/src/3rdparty/masm/wtf/Assertions.h @@ -256,7 +256,7 @@ inline void assertUnused(T& x) { (void)x; } (void)0) #define ASSERT_NOT_REACHED() do { \ - WTFReportAssertionFailure(__FILE__, __LINE__, WTF_PRETTY_FUNCTION, 0); \ + WTFReportAssertionFailure(__FILE__, __LINE__, WTF_PRETTY_FUNCTION, NULL); \ CRASH(); \ } while (0) diff --git a/src/3rdparty/masm/wtf/FilePrintStream.cpp b/src/3rdparty/masm/wtf/FilePrintStream.cpp index 45f1565f46..28714ecb6f 100644 --- a/src/3rdparty/masm/wtf/FilePrintStream.cpp +++ b/src/3rdparty/masm/wtf/FilePrintStream.cpp @@ -38,17 +38,16 @@ FilePrintStream::~FilePrintStream() { if (m_adoptionMode == Borrow) return; - if (m_file) - fclose(m_file); + fclose(m_file); } -PassOwnPtr<FilePrintStream> FilePrintStream::open(const char* filename, const char* mode) +std::unique_ptr<FilePrintStream> FilePrintStream::open(const char* filename, const char* mode) { FILE* file = fopen(filename, mode); if (!file) - return PassOwnPtr<FilePrintStream>(); - - return adoptPtr(new FilePrintStream(file)); + return nullptr; + + return std::make_unique<FilePrintStream>(file); } void FilePrintStream::vprintf(const char* format, va_list argList) diff --git a/src/3rdparty/masm/wtf/FilePrintStream.h b/src/3rdparty/masm/wtf/FilePrintStream.h index bdeab4c479..f32ca49dcb 100644 --- a/src/3rdparty/masm/wtf/FilePrintStream.h +++ b/src/3rdparty/masm/wtf/FilePrintStream.h @@ -27,7 +27,6 @@ #define FilePrintStream_h #include <stdio.h> -#include <wtf/PassOwnPtr.h> #include <wtf/PrintStream.h> namespace WTF { @@ -40,14 +39,14 @@ public: }; FilePrintStream(FILE*, AdoptionMode = Adopt); - virtual ~FilePrintStream(); + virtual ~FilePrintStream() override; - static PassOwnPtr<FilePrintStream> open(const char* filename, const char* mode); + WTF_EXPORT_PRIVATE static std::unique_ptr<FilePrintStream> open(const char* filename, const char* mode); FILE* file() { return m_file; } - void vprintf(const char* format, va_list) WTF_ATTRIBUTE_PRINTF(2, 0); - void flush(); + void vprintf(const char* format, va_list) override WTF_ATTRIBUTE_PRINTF(2, 0); + void flush() override; private: FILE* m_file; diff --git a/src/3rdparty/masm/wtf/OSAllocatorPosix.cpp b/src/3rdparty/masm/wtf/OSAllocatorPosix.cpp index 0c902c7172..3b2a73a39a 100644 --- a/src/3rdparty/masm/wtf/OSAllocatorPosix.cpp +++ b/src/3rdparty/masm/wtf/OSAllocatorPosix.cpp @@ -31,13 +31,82 @@ #include <cstdlib> #include "PageAllocation.h" +#include <dlfcn.h> #include <errno.h> #include <sys/mman.h> #include <wtf/Assertions.h> #include <wtf/UnusedParam.h> +#if OS(LINUX) +#include <sys/syscall.h> +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif +#endif + +#if defined(__ANDROID__) && defined(SYS_memfd_create) + // On Android it's been observed that permissions of memory mappings + // backed by a memfd could not be changed via mprotect for no obvious + // reason. +# undef SYS_memfd_create +#endif + namespace WTF { +#ifdef SYS_memfd_create +static int memfdForUsage(size_t bytes, OSAllocator::Usage usage) +{ + const char *type = "unknown-usage:"; + switch (usage) { + case OSAllocator::UnknownUsage: + break; + case OSAllocator::FastMallocPages: + type = "fastmalloc:"; + break; + case OSAllocator::JSGCHeapPages: + type = "JSGCHeap:"; + break; + case OSAllocator::JSVMStackPages: + type = "JSVMStack:"; + break; + case OSAllocator::JSJITCodePages: + type = "JITCode:"; + break; + } + + // try to get our own library name by giving dladdr a pointer pointing to + // something we know to be in it (using a pointer to string data) + static const char *libname = [=]() { + Dl_info info; + if (dladdr(type, &info) == 0) + info.dli_fname = nullptr; + return info.dli_fname; + }(); + + char buf[PATH_MAX]; + strcpy(buf, type); + if (libname) + strcat(buf, libname); + else + strcat(buf, "QtQml"); + + int fd = syscall(SYS_memfd_create, buf, MFD_CLOEXEC); + if (fd != -1) { + if (ftruncate(fd, bytes) == 0) + return fd; + } + close(fd); + return -1; +} +#elif OS(LINUX) +static int memfdForUsage(size_t bytes, OSAllocator::Usage usage) +{ + UNUSED_PARAM(bytes); + UNUSED_PARAM(usage); + return -1; +} +#endif + void* OSAllocator::reserveUncommitted(size_t bytes, Usage usage, bool writable, bool executable) { #if OS(QNX) @@ -46,14 +115,18 @@ void* OSAllocator::reserveUncommitted(size_t bytes, Usage usage, bool writable, if (result == MAP_FAILED) CRASH(); #elif OS(LINUX) - UNUSED_PARAM(usage); UNUSED_PARAM(writable); UNUSED_PARAM(executable); + int fd = memfdForUsage(bytes, usage); - void* result = mmap(0, bytes, PROT_NONE, MAP_NORESERVE | MAP_PRIVATE | MAP_ANON, -1, 0); + void* result = mmap(0, bytes, PROT_NONE, MAP_NORESERVE | MAP_PRIVATE | + (fd == -1 ? MAP_ANON : 0), fd, 0); if (result == MAP_FAILED) CRASH(); madvise(result, bytes, MADV_DONTNEED); + + if (fd != -1) + close(fd); #else void* result = reserveAndCommit(bytes, usage, writable, executable); #if HAVE(MADV_FREE_REUSE) @@ -83,6 +156,10 @@ void* OSAllocator::reserveAndCommit(size_t bytes, Usage usage, bool writable, bo #if OS(DARWIN) int fd = usage; +#elif OS(LINUX) + int fd = memfdForUsage(bytes, usage); + if (fd != -1) + flags &= ~MAP_ANON; #else UNUSED_PARAM(usage); int fd = -1; @@ -126,6 +203,12 @@ void* OSAllocator::reserveAndCommit(size_t bytes, Usage usage, bool writable, bo mmap(result, pageSize(), PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, fd, 0); mmap(static_cast<char*>(result) + bytes - pageSize(), pageSize(), PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, fd, 0); } + +#if OS(LINUX) + if (fd != -1) + close(fd); +#endif + return result; } diff --git a/src/3rdparty/masm/wtf/Platform.h b/src/3rdparty/masm/wtf/Platform.h index 5905f42f45..d5f69927db 100644 --- a/src/3rdparty/masm/wtf/Platform.h +++ b/src/3rdparty/masm/wtf/Platform.h @@ -2,6 +2,7 @@ * Copyright (C) 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved. * Copyright (C) 2007-2009 Torch Mobile, Inc. * Copyright (C) 2010, 2011 Research In Motion Limited. All rights reserved. + * Copyright (C) 2018 The Qt Company Ltd. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -740,10 +741,6 @@ #define ENABLE_JIT 0 #endif -#if !defined(ENABLE_JIT) && CPU(SH4) && PLATFORM(QT) -#define ENABLE_JIT 1 -#endif - /* The JIT is enabled by default on all x86, x86-64, ARM & MIPS platforms. */ #if !defined(ENABLE_JIT) \ && (CPU(X86) || CPU(X86_64) || CPU(ARM) || CPU(MIPS) || CPU(ARM64)) \ @@ -1050,4 +1047,11 @@ #define WTF_USE_CONTENT_FILTERING 1 #endif +#if ENABLE(YARR_JIT) +#if CPU(ARM64) || (CPU(X86_64) && !OS(WINDOWS)) +/* Enable JIT'ing Regular Expressions that have nested parenthesis. */ +#define ENABLE_YARR_JIT_ALL_PARENS_EXPRESSIONS 1 +#endif +#endif + #endif /* WTF_Platform_h */ diff --git a/src/3rdparty/masm/wtf/PrintStream.h b/src/3rdparty/masm/wtf/PrintStream.h index 6fcf9c1567..4372288aff 100644 --- a/src/3rdparty/masm/wtf/PrintStream.h +++ b/src/3rdparty/masm/wtf/PrintStream.h @@ -206,6 +206,10 @@ public: print(value12); print(value13); } + + void println(); + template<typename ...Types> + void println(Types... args); }; WTF_EXPORT_PRIVATE void printInternal(PrintStream&, const char*); @@ -227,6 +231,19 @@ void printInternal(PrintStream& out, const T& value) value.dump(out); } +inline +void PrintStream::println() +{ + print("\n"); +} + +template<typename ...Types> +void PrintStream::println(Types... args) +{ + print(args...); + print("\n"); +} + #define MAKE_PRINT_ADAPTOR(Name, Type, function) \ class Name { \ public: \ diff --git a/src/3rdparty/masm/wtf/StdLibExtras.h b/src/3rdparty/masm/wtf/StdLibExtras.h index f0d792ed52..421712c349 100644 --- a/src/3rdparty/masm/wtf/StdLibExtras.h +++ b/src/3rdparty/masm/wtf/StdLibExtras.h @@ -28,6 +28,9 @@ #include <wtf/Assertions.h> #include <wtf/CheckedArithmetic.h> +#include <wtf/Platform.h> +#include <memory> +#include <qglobal.h> // Use these to declare and define a static local variable (static T;) so that // it is leaked so that its destructors are not called at exit. Using this @@ -71,6 +74,8 @@ #define STRINGIZE(exp) #exp #define STRINGIZE_VALUE_OF(exp) STRINGIZE(exp) +#define FALLTHROUGH Q_FALLTHROUGH() + /* * The reinterpret_cast<Type1*>([pointer to Type2]) expressions - where * sizeof(Type1) > sizeof(Type2) - cause the following warning on ARM with GCC: diff --git a/src/3rdparty/masm/wtf/VMTags.h b/src/3rdparty/masm/wtf/VMTags.h index 117bc3721e..af5352e471 100644 --- a/src/3rdparty/masm/wtf/VMTags.h +++ b/src/3rdparty/masm/wtf/VMTags.h @@ -62,6 +62,14 @@ #define VM_TAG_FOR_WEBCORE_PURGEABLE_MEMORY VM_MAKE_TAG(69) #endif // defined(VM_MEMORY_WEBCORE_PURGEABLE_BUFFERS) +#elif OS(LINUX) + +#define VM_TAG_FOR_TCMALLOC_MEMORY 0 +#define VM_TAG_FOR_COLLECTOR_MEMORY 1 +#define VM_TAG_FOR_EXECUTABLEALLOCATOR_MEMORY 2 +#define VM_TAG_FOR_REGISTERFILE_MEMORY 3 +#define VM_TAG_FOR_WEBCORE_PURGEABLE_MEMORY 4 + #else // OS(DARWIN) #define VM_TAG_FOR_TCMALLOC_MEMORY -1 diff --git a/src/3rdparty/masm/yarr/Yarr.h b/src/3rdparty/masm/yarr/Yarr.h index d393e9fa90..ccf78f9880 100644 --- a/src/3rdparty/masm/yarr/Yarr.h +++ b/src/3rdparty/masm/yarr/Yarr.h @@ -25,25 +25,25 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef Yarr_h -#define Yarr_h +#pragma once -#include "YarrInterpreter.h" -#include "YarrPattern.h" +#include <limits.h> +#include "YarrErrorCode.h" namespace JSC { namespace Yarr { -#define YarrStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers. -#define YarrStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers. +#define YarrStackSpaceForBackTrackInfoPatternCharacter 2 // Only for !fixed quantifiers. +#define YarrStackSpaceForBackTrackInfoCharacterClass 2 // Only for !fixed quantifiers. #define YarrStackSpaceForBackTrackInfoBackReference 2 #define YarrStackSpaceForBackTrackInfoAlternative 1 // One per alternative. #define YarrStackSpaceForBackTrackInfoParentheticalAssertion 1 -#define YarrStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. +#define YarrStackSpaceForBackTrackInfoParenthesesOnce 2 #define YarrStackSpaceForBackTrackInfoParenthesesTerminal 1 -#define YarrStackSpaceForBackTrackInfoParentheses 2 +#define YarrStackSpaceForBackTrackInfoParentheses 4 +#define YarrStackSpaceForDotStarEnclosure 1 static const unsigned quantifyInfinite = UINT_MAX; -static const unsigned offsetNoMatch = (unsigned)-1; +static const unsigned offsetNoMatch = std::numeric_limits<unsigned>::max(); // The below limit restricts the number of "recursive" match calls in order to // avoid spending exponential time on complex regular expressions. @@ -53,9 +53,10 @@ enum JSRegExpResult { JSRegExpMatch = 1, JSRegExpNoMatch = 0, JSRegExpErrorNoMatch = -1, - JSRegExpErrorHitLimit = -2, - JSRegExpErrorNoMemory = -3, - JSRegExpErrorInternal = -4 + JSRegExpJITCodeFailure = -2, + JSRegExpErrorHitLimit = -3, + JSRegExpErrorNoMemory = -4, + JSRegExpErrorInternal = -5, }; enum YarrCharSize { @@ -63,7 +64,14 @@ enum YarrCharSize { Char16 }; -} } // namespace JSC::Yarr +enum class BuiltInCharacterClassID : unsigned { + DigitClassID, + SpaceClassID, + WordClassID, + DotClassID, + BaseUnicodePropertyID +}; -#endif // Yarr_h +struct BytecodePattern; +} } // namespace JSC::Yarr diff --git a/src/3rdparty/masm/yarr/YarrCanonicalize.h b/src/3rdparty/masm/yarr/YarrCanonicalize.h new file mode 100644 index 0000000000..fb5e0231ac --- /dev/null +++ b/src/3rdparty/masm/yarr/YarrCanonicalize.h @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2012-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <stdint.h> +#include <unicode/utypes.h> + +namespace JSC { namespace Yarr { + +// This set of data provides information for each UCS2 code point as to the set of code points +// that it should match under the ES6 case insensitive RegExp matching rules, specified in 21.2.2.8.2. +// The non-Unicode tables are autogenerated using YarrCanonicalize.js into YarrCanonicalize.cpp. +// The Unicode tables are autogenerated using the python script generateYarrCanonicalizeUnicode +// which creates YarrCanonicalizeUnicode.cpp. +enum UCS2CanonicalizationType { + CanonicalizeUnique, // No canonically equal values, e.g. 0x0. + CanonicalizeSet, // Value indicates a set in characterSetInfo. + CanonicalizeRangeLo, // Value is positive delta to pair, E.g. 0x41 has value 0x20, -> 0x61. + CanonicalizeRangeHi, // Value is positive delta to pair, E.g. 0x61 has value 0x20, -> 0x41. + CanonicalizeAlternatingAligned, // Aligned consequtive pair, e.g. 0x1f4,0x1f5. + CanonicalizeAlternatingUnaligned, // Unaligned consequtive pair, e.g. 0x241,0x242. +}; +struct CanonicalizationRange { + UChar32 begin; + UChar32 end; + UChar32 value; + UCS2CanonicalizationType type; +}; + +extern const size_t UCS2_CANONICALIZATION_RANGES; +extern const UChar32* const ucs2CharacterSetInfo[]; +extern const CanonicalizationRange ucs2RangeInfo[]; + +extern const size_t UNICODE_CANONICALIZATION_RANGES; +extern const UChar32* const unicodeCharacterSetInfo[]; +extern const CanonicalizationRange unicodeRangeInfo[]; + +enum class CanonicalMode { UCS2, Unicode }; + +inline const UChar32* canonicalCharacterSetInfo(unsigned index, CanonicalMode canonicalMode) +{ + const UChar32* const* rangeInfo = canonicalMode == CanonicalMode::UCS2 ? ucs2CharacterSetInfo : unicodeCharacterSetInfo; + return rangeInfo[index]; +} + +// This searches in log2 time over ~400-600 entries, so should typically result in 9 compares. +inline const CanonicalizationRange* canonicalRangeInfoFor(UChar32 ch, CanonicalMode canonicalMode = CanonicalMode::UCS2) +{ + const CanonicalizationRange* info = canonicalMode == CanonicalMode::UCS2 ? ucs2RangeInfo : unicodeRangeInfo; + size_t entries = canonicalMode == CanonicalMode::UCS2 ? UCS2_CANONICALIZATION_RANGES : UNICODE_CANONICALIZATION_RANGES; + + while (true) { + size_t candidate = entries >> 1; + const CanonicalizationRange* candidateInfo = info + candidate; + if (ch < candidateInfo->begin) + entries = candidate; + else if (ch <= candidateInfo->end) + return candidateInfo; + else { + info = candidateInfo + 1; + entries -= (candidate + 1); + } + } +} + +// Should only be called for characters that have one canonically matching value. +inline UChar32 getCanonicalPair(const CanonicalizationRange* info, UChar32 ch) +{ + ASSERT(ch >= info->begin && ch <= info->end); + switch (info->type) { + case CanonicalizeRangeLo: + return ch + info->value; + case CanonicalizeRangeHi: + return ch - info->value; + case CanonicalizeAlternatingAligned: + return ch ^ 1; + case CanonicalizeAlternatingUnaligned: + return ((ch - 1) ^ 1) + 1; + default: + RELEASE_ASSERT_NOT_REACHED(); + } + RELEASE_ASSERT_NOT_REACHED(); + return 0; +} + +// Returns true if no other UCS2 codepoint can match this value. +inline bool isCanonicallyUnique(UChar32 ch, CanonicalMode canonicalMode = CanonicalMode::UCS2) +{ + return canonicalRangeInfoFor(ch, canonicalMode)->type == CanonicalizeUnique; +} + +// Returns true if values are equal, under the canonicalization rules. +inline bool areCanonicallyEquivalent(UChar32 a, UChar32 b, CanonicalMode canonicalMode = CanonicalMode::UCS2) +{ + const CanonicalizationRange* info = canonicalRangeInfoFor(a, canonicalMode); + switch (info->type) { + case CanonicalizeUnique: + return a == b; + case CanonicalizeSet: { + for (const UChar32* set = canonicalCharacterSetInfo(info->value, canonicalMode); (a = *set); ++set) { + if (a == b) + return true; + } + return false; + } + case CanonicalizeRangeLo: + return (a == b) || (a + info->value == b); + case CanonicalizeRangeHi: + return (a == b) || (a - info->value == b); + case CanonicalizeAlternatingAligned: + return (a | 1) == (b | 1); + case CanonicalizeAlternatingUnaligned: + return ((a - 1) | 1) == ((b - 1) | 1); + } + + RELEASE_ASSERT_NOT_REACHED(); + return false; +} + +} } // JSC::Yarr diff --git a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.cpp b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.cpp index 7bb3d08eb5..d91c771590 100644 --- a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.cpp +++ b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012 Apple Inc. All rights reserved. + * Copyright (C) 2012-2013, 2015-2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,33 +23,31 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -// DO NOT EDIT! - this file autogenerated by YarrCanonicalizeUCS2.js +// DO NOT EDIT! - this file autogenerated by YarrCanonicalize.js #include "config.h" -#include "YarrCanonicalizeUCS2.h" +#include "YarrCanonicalize.h" namespace JSC { namespace Yarr { -#include <stdint.h> - -uint16_t ucs2CharacterSet0[] = { 0x01c4u, 0x01c5u, 0x01c6u, 0 }; -uint16_t ucs2CharacterSet1[] = { 0x01c7u, 0x01c8u, 0x01c9u, 0 }; -uint16_t ucs2CharacterSet2[] = { 0x01cau, 0x01cbu, 0x01ccu, 0 }; -uint16_t ucs2CharacterSet3[] = { 0x01f1u, 0x01f2u, 0x01f3u, 0 }; -uint16_t ucs2CharacterSet4[] = { 0x0392u, 0x03b2u, 0x03d0u, 0 }; -uint16_t ucs2CharacterSet5[] = { 0x0395u, 0x03b5u, 0x03f5u, 0 }; -uint16_t ucs2CharacterSet6[] = { 0x0398u, 0x03b8u, 0x03d1u, 0 }; -uint16_t ucs2CharacterSet7[] = { 0x0345u, 0x0399u, 0x03b9u, 0x1fbeu, 0 }; -uint16_t ucs2CharacterSet8[] = { 0x039au, 0x03bau, 0x03f0u, 0 }; -uint16_t ucs2CharacterSet9[] = { 0x00b5u, 0x039cu, 0x03bcu, 0 }; -uint16_t ucs2CharacterSet10[] = { 0x03a0u, 0x03c0u, 0x03d6u, 0 }; -uint16_t ucs2CharacterSet11[] = { 0x03a1u, 0x03c1u, 0x03f1u, 0 }; -uint16_t ucs2CharacterSet12[] = { 0x03a3u, 0x03c2u, 0x03c3u, 0 }; -uint16_t ucs2CharacterSet13[] = { 0x03a6u, 0x03c6u, 0x03d5u, 0 }; -uint16_t ucs2CharacterSet14[] = { 0x1e60u, 0x1e61u, 0x1e9bu, 0 }; +const UChar32 ucs2CharacterSet0[] = { 0x01c4, 0x01c5, 0x01c6, 0 }; +const UChar32 ucs2CharacterSet1[] = { 0x01c7, 0x01c8, 0x01c9, 0 }; +const UChar32 ucs2CharacterSet2[] = { 0x01ca, 0x01cb, 0x01cc, 0 }; +const UChar32 ucs2CharacterSet3[] = { 0x01f1, 0x01f2, 0x01f3, 0 }; +const UChar32 ucs2CharacterSet4[] = { 0x0392, 0x03b2, 0x03d0, 0 }; +const UChar32 ucs2CharacterSet5[] = { 0x0395, 0x03b5, 0x03f5, 0 }; +const UChar32 ucs2CharacterSet6[] = { 0x0398, 0x03b8, 0x03d1, 0 }; +const UChar32 ucs2CharacterSet7[] = { 0x0345, 0x0399, 0x03b9, 0x1fbe, 0 }; +const UChar32 ucs2CharacterSet8[] = { 0x039a, 0x03ba, 0x03f0, 0 }; +const UChar32 ucs2CharacterSet9[] = { 0x00b5, 0x039c, 0x03bc, 0 }; +const UChar32 ucs2CharacterSet10[] = { 0x03a0, 0x03c0, 0x03d6, 0 }; +const UChar32 ucs2CharacterSet11[] = { 0x03a1, 0x03c1, 0x03f1, 0 }; +const UChar32 ucs2CharacterSet12[] = { 0x03a3, 0x03c2, 0x03c3, 0 }; +const UChar32 ucs2CharacterSet13[] = { 0x03a6, 0x03c6, 0x03d5, 0 }; +const UChar32 ucs2CharacterSet14[] = { 0x1e60, 0x1e61, 0x1e9b, 0 }; static const size_t UCS2_CANONICALIZATION_SETS = 15; -uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = { +const UChar32* const ucs2CharacterSetInfo[UCS2_CANONICALIZATION_SETS] = { ucs2CharacterSet0, ucs2CharacterSet1, ucs2CharacterSet2, @@ -67,396 +65,399 @@ uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = { ucs2CharacterSet14, }; -const size_t UCS2_CANONICALIZATION_RANGES = 364; -UCS2CanonicalizationRange rangeInfo[UCS2_CANONICALIZATION_RANGES] = { - { 0x0000u, 0x0040u, 0x0000u, CanonicalizeUnique }, - { 0x0041u, 0x005au, 0x0020u, CanonicalizeRangeLo }, - { 0x005bu, 0x0060u, 0x0000u, CanonicalizeUnique }, - { 0x0061u, 0x007au, 0x0020u, CanonicalizeRangeHi }, - { 0x007bu, 0x00b4u, 0x0000u, CanonicalizeUnique }, - { 0x00b5u, 0x00b5u, 0x0009u, CanonicalizeSet }, - { 0x00b6u, 0x00bfu, 0x0000u, CanonicalizeUnique }, - { 0x00c0u, 0x00d6u, 0x0020u, CanonicalizeRangeLo }, - { 0x00d7u, 0x00d7u, 0x0000u, CanonicalizeUnique }, - { 0x00d8u, 0x00deu, 0x0020u, CanonicalizeRangeLo }, - { 0x00dfu, 0x00dfu, 0x0000u, CanonicalizeUnique }, - { 0x00e0u, 0x00f6u, 0x0020u, CanonicalizeRangeHi }, - { 0x00f7u, 0x00f7u, 0x0000u, CanonicalizeUnique }, - { 0x00f8u, 0x00feu, 0x0020u, CanonicalizeRangeHi }, - { 0x00ffu, 0x00ffu, 0x0079u, CanonicalizeRangeLo }, - { 0x0100u, 0x012fu, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x0130u, 0x0131u, 0x0000u, CanonicalizeUnique }, - { 0x0132u, 0x0137u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x0138u, 0x0138u, 0x0000u, CanonicalizeUnique }, - { 0x0139u, 0x0148u, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x0149u, 0x0149u, 0x0000u, CanonicalizeUnique }, - { 0x014au, 0x0177u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x0178u, 0x0178u, 0x0079u, CanonicalizeRangeHi }, - { 0x0179u, 0x017eu, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x017fu, 0x017fu, 0x0000u, CanonicalizeUnique }, - { 0x0180u, 0x0180u, 0x00c3u, CanonicalizeRangeLo }, - { 0x0181u, 0x0181u, 0x00d2u, CanonicalizeRangeLo }, - { 0x0182u, 0x0185u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x0186u, 0x0186u, 0x00ceu, CanonicalizeRangeLo }, - { 0x0187u, 0x0188u, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x0189u, 0x018au, 0x00cdu, CanonicalizeRangeLo }, - { 0x018bu, 0x018cu, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x018du, 0x018du, 0x0000u, CanonicalizeUnique }, - { 0x018eu, 0x018eu, 0x004fu, CanonicalizeRangeLo }, - { 0x018fu, 0x018fu, 0x00cau, CanonicalizeRangeLo }, - { 0x0190u, 0x0190u, 0x00cbu, CanonicalizeRangeLo }, - { 0x0191u, 0x0192u, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x0193u, 0x0193u, 0x00cdu, CanonicalizeRangeLo }, - { 0x0194u, 0x0194u, 0x00cfu, CanonicalizeRangeLo }, - { 0x0195u, 0x0195u, 0x0061u, CanonicalizeRangeLo }, - { 0x0196u, 0x0196u, 0x00d3u, CanonicalizeRangeLo }, - { 0x0197u, 0x0197u, 0x00d1u, CanonicalizeRangeLo }, - { 0x0198u, 0x0199u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x019au, 0x019au, 0x00a3u, CanonicalizeRangeLo }, - { 0x019bu, 0x019bu, 0x0000u, CanonicalizeUnique }, - { 0x019cu, 0x019cu, 0x00d3u, CanonicalizeRangeLo }, - { 0x019du, 0x019du, 0x00d5u, CanonicalizeRangeLo }, - { 0x019eu, 0x019eu, 0x0082u, CanonicalizeRangeLo }, - { 0x019fu, 0x019fu, 0x00d6u, CanonicalizeRangeLo }, - { 0x01a0u, 0x01a5u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x01a6u, 0x01a6u, 0x00dau, CanonicalizeRangeLo }, - { 0x01a7u, 0x01a8u, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x01a9u, 0x01a9u, 0x00dau, CanonicalizeRangeLo }, - { 0x01aau, 0x01abu, 0x0000u, CanonicalizeUnique }, - { 0x01acu, 0x01adu, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x01aeu, 0x01aeu, 0x00dau, CanonicalizeRangeLo }, - { 0x01afu, 0x01b0u, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x01b1u, 0x01b2u, 0x00d9u, CanonicalizeRangeLo }, - { 0x01b3u, 0x01b6u, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x01b7u, 0x01b7u, 0x00dbu, CanonicalizeRangeLo }, - { 0x01b8u, 0x01b9u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x01bau, 0x01bbu, 0x0000u, CanonicalizeUnique }, - { 0x01bcu, 0x01bdu, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x01beu, 0x01beu, 0x0000u, CanonicalizeUnique }, - { 0x01bfu, 0x01bfu, 0x0038u, CanonicalizeRangeLo }, - { 0x01c0u, 0x01c3u, 0x0000u, CanonicalizeUnique }, - { 0x01c4u, 0x01c6u, 0x0000u, CanonicalizeSet }, - { 0x01c7u, 0x01c9u, 0x0001u, CanonicalizeSet }, - { 0x01cau, 0x01ccu, 0x0002u, CanonicalizeSet }, - { 0x01cdu, 0x01dcu, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x01ddu, 0x01ddu, 0x004fu, CanonicalizeRangeHi }, - { 0x01deu, 0x01efu, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x01f0u, 0x01f0u, 0x0000u, CanonicalizeUnique }, - { 0x01f1u, 0x01f3u, 0x0003u, CanonicalizeSet }, - { 0x01f4u, 0x01f5u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x01f6u, 0x01f6u, 0x0061u, CanonicalizeRangeHi }, - { 0x01f7u, 0x01f7u, 0x0038u, CanonicalizeRangeHi }, - { 0x01f8u, 0x021fu, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x0220u, 0x0220u, 0x0082u, CanonicalizeRangeHi }, - { 0x0221u, 0x0221u, 0x0000u, CanonicalizeUnique }, - { 0x0222u, 0x0233u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x0234u, 0x0239u, 0x0000u, CanonicalizeUnique }, - { 0x023au, 0x023au, 0x2a2bu, CanonicalizeRangeLo }, - { 0x023bu, 0x023cu, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x023du, 0x023du, 0x00a3u, CanonicalizeRangeHi }, - { 0x023eu, 0x023eu, 0x2a28u, CanonicalizeRangeLo }, - { 0x023fu, 0x0240u, 0x2a3fu, CanonicalizeRangeLo }, - { 0x0241u, 0x0242u, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x0243u, 0x0243u, 0x00c3u, CanonicalizeRangeHi }, - { 0x0244u, 0x0244u, 0x0045u, CanonicalizeRangeLo }, - { 0x0245u, 0x0245u, 0x0047u, CanonicalizeRangeLo }, - { 0x0246u, 0x024fu, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x0250u, 0x0250u, 0x2a1fu, CanonicalizeRangeLo }, - { 0x0251u, 0x0251u, 0x2a1cu, CanonicalizeRangeLo }, - { 0x0252u, 0x0252u, 0x2a1eu, CanonicalizeRangeLo }, - { 0x0253u, 0x0253u, 0x00d2u, CanonicalizeRangeHi }, - { 0x0254u, 0x0254u, 0x00ceu, CanonicalizeRangeHi }, - { 0x0255u, 0x0255u, 0x0000u, CanonicalizeUnique }, - { 0x0256u, 0x0257u, 0x00cdu, CanonicalizeRangeHi }, - { 0x0258u, 0x0258u, 0x0000u, CanonicalizeUnique }, - { 0x0259u, 0x0259u, 0x00cau, CanonicalizeRangeHi }, - { 0x025au, 0x025au, 0x0000u, CanonicalizeUnique }, - { 0x025bu, 0x025bu, 0x00cbu, CanonicalizeRangeHi }, - { 0x025cu, 0x025fu, 0x0000u, CanonicalizeUnique }, - { 0x0260u, 0x0260u, 0x00cdu, CanonicalizeRangeHi }, - { 0x0261u, 0x0262u, 0x0000u, CanonicalizeUnique }, - { 0x0263u, 0x0263u, 0x00cfu, CanonicalizeRangeHi }, - { 0x0264u, 0x0264u, 0x0000u, CanonicalizeUnique }, - { 0x0265u, 0x0265u, 0xa528u, CanonicalizeRangeLo }, - { 0x0266u, 0x0267u, 0x0000u, CanonicalizeUnique }, - { 0x0268u, 0x0268u, 0x00d1u, CanonicalizeRangeHi }, - { 0x0269u, 0x0269u, 0x00d3u, CanonicalizeRangeHi }, - { 0x026au, 0x026au, 0x0000u, CanonicalizeUnique }, - { 0x026bu, 0x026bu, 0x29f7u, CanonicalizeRangeLo }, - { 0x026cu, 0x026eu, 0x0000u, CanonicalizeUnique }, - { 0x026fu, 0x026fu, 0x00d3u, CanonicalizeRangeHi }, - { 0x0270u, 0x0270u, 0x0000u, CanonicalizeUnique }, - { 0x0271u, 0x0271u, 0x29fdu, CanonicalizeRangeLo }, - { 0x0272u, 0x0272u, 0x00d5u, CanonicalizeRangeHi }, - { 0x0273u, 0x0274u, 0x0000u, CanonicalizeUnique }, - { 0x0275u, 0x0275u, 0x00d6u, CanonicalizeRangeHi }, - { 0x0276u, 0x027cu, 0x0000u, CanonicalizeUnique }, - { 0x027du, 0x027du, 0x29e7u, CanonicalizeRangeLo }, - { 0x027eu, 0x027fu, 0x0000u, CanonicalizeUnique }, - { 0x0280u, 0x0280u, 0x00dau, CanonicalizeRangeHi }, - { 0x0281u, 0x0282u, 0x0000u, CanonicalizeUnique }, - { 0x0283u, 0x0283u, 0x00dau, CanonicalizeRangeHi }, - { 0x0284u, 0x0287u, 0x0000u, CanonicalizeUnique }, - { 0x0288u, 0x0288u, 0x00dau, CanonicalizeRangeHi }, - { 0x0289u, 0x0289u, 0x0045u, CanonicalizeRangeHi }, - { 0x028au, 0x028bu, 0x00d9u, CanonicalizeRangeHi }, - { 0x028cu, 0x028cu, 0x0047u, CanonicalizeRangeHi }, - { 0x028du, 0x0291u, 0x0000u, CanonicalizeUnique }, - { 0x0292u, 0x0292u, 0x00dbu, CanonicalizeRangeHi }, - { 0x0293u, 0x0344u, 0x0000u, CanonicalizeUnique }, - { 0x0345u, 0x0345u, 0x0007u, CanonicalizeSet }, - { 0x0346u, 0x036fu, 0x0000u, CanonicalizeUnique }, - { 0x0370u, 0x0373u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x0374u, 0x0375u, 0x0000u, CanonicalizeUnique }, - { 0x0376u, 0x0377u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x0378u, 0x037au, 0x0000u, CanonicalizeUnique }, - { 0x037bu, 0x037du, 0x0082u, CanonicalizeRangeLo }, - { 0x037eu, 0x0385u, 0x0000u, CanonicalizeUnique }, - { 0x0386u, 0x0386u, 0x0026u, CanonicalizeRangeLo }, - { 0x0387u, 0x0387u, 0x0000u, CanonicalizeUnique }, - { 0x0388u, 0x038au, 0x0025u, CanonicalizeRangeLo }, - { 0x038bu, 0x038bu, 0x0000u, CanonicalizeUnique }, - { 0x038cu, 0x038cu, 0x0040u, CanonicalizeRangeLo }, - { 0x038du, 0x038du, 0x0000u, CanonicalizeUnique }, - { 0x038eu, 0x038fu, 0x003fu, CanonicalizeRangeLo }, - { 0x0390u, 0x0390u, 0x0000u, CanonicalizeUnique }, - { 0x0391u, 0x0391u, 0x0020u, CanonicalizeRangeLo }, - { 0x0392u, 0x0392u, 0x0004u, CanonicalizeSet }, - { 0x0393u, 0x0394u, 0x0020u, CanonicalizeRangeLo }, - { 0x0395u, 0x0395u, 0x0005u, CanonicalizeSet }, - { 0x0396u, 0x0397u, 0x0020u, CanonicalizeRangeLo }, - { 0x0398u, 0x0398u, 0x0006u, CanonicalizeSet }, - { 0x0399u, 0x0399u, 0x0007u, CanonicalizeSet }, - { 0x039au, 0x039au, 0x0008u, CanonicalizeSet }, - { 0x039bu, 0x039bu, 0x0020u, CanonicalizeRangeLo }, - { 0x039cu, 0x039cu, 0x0009u, CanonicalizeSet }, - { 0x039du, 0x039fu, 0x0020u, CanonicalizeRangeLo }, - { 0x03a0u, 0x03a0u, 0x000au, CanonicalizeSet }, - { 0x03a1u, 0x03a1u, 0x000bu, CanonicalizeSet }, - { 0x03a2u, 0x03a2u, 0x0000u, CanonicalizeUnique }, - { 0x03a3u, 0x03a3u, 0x000cu, CanonicalizeSet }, - { 0x03a4u, 0x03a5u, 0x0020u, CanonicalizeRangeLo }, - { 0x03a6u, 0x03a6u, 0x000du, CanonicalizeSet }, - { 0x03a7u, 0x03abu, 0x0020u, CanonicalizeRangeLo }, - { 0x03acu, 0x03acu, 0x0026u, CanonicalizeRangeHi }, - { 0x03adu, 0x03afu, 0x0025u, CanonicalizeRangeHi }, - { 0x03b0u, 0x03b0u, 0x0000u, CanonicalizeUnique }, - { 0x03b1u, 0x03b1u, 0x0020u, CanonicalizeRangeHi }, - { 0x03b2u, 0x03b2u, 0x0004u, CanonicalizeSet }, - { 0x03b3u, 0x03b4u, 0x0020u, CanonicalizeRangeHi }, - { 0x03b5u, 0x03b5u, 0x0005u, CanonicalizeSet }, - { 0x03b6u, 0x03b7u, 0x0020u, CanonicalizeRangeHi }, - { 0x03b8u, 0x03b8u, 0x0006u, CanonicalizeSet }, - { 0x03b9u, 0x03b9u, 0x0007u, CanonicalizeSet }, - { 0x03bau, 0x03bau, 0x0008u, CanonicalizeSet }, - { 0x03bbu, 0x03bbu, 0x0020u, CanonicalizeRangeHi }, - { 0x03bcu, 0x03bcu, 0x0009u, CanonicalizeSet }, - { 0x03bdu, 0x03bfu, 0x0020u, CanonicalizeRangeHi }, - { 0x03c0u, 0x03c0u, 0x000au, CanonicalizeSet }, - { 0x03c1u, 0x03c1u, 0x000bu, CanonicalizeSet }, - { 0x03c2u, 0x03c3u, 0x000cu, CanonicalizeSet }, - { 0x03c4u, 0x03c5u, 0x0020u, CanonicalizeRangeHi }, - { 0x03c6u, 0x03c6u, 0x000du, CanonicalizeSet }, - { 0x03c7u, 0x03cbu, 0x0020u, CanonicalizeRangeHi }, - { 0x03ccu, 0x03ccu, 0x0040u, CanonicalizeRangeHi }, - { 0x03cdu, 0x03ceu, 0x003fu, CanonicalizeRangeHi }, - { 0x03cfu, 0x03cfu, 0x0008u, CanonicalizeRangeLo }, - { 0x03d0u, 0x03d0u, 0x0004u, CanonicalizeSet }, - { 0x03d1u, 0x03d1u, 0x0006u, CanonicalizeSet }, - { 0x03d2u, 0x03d4u, 0x0000u, CanonicalizeUnique }, - { 0x03d5u, 0x03d5u, 0x000du, CanonicalizeSet }, - { 0x03d6u, 0x03d6u, 0x000au, CanonicalizeSet }, - { 0x03d7u, 0x03d7u, 0x0008u, CanonicalizeRangeHi }, - { 0x03d8u, 0x03efu, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x03f0u, 0x03f0u, 0x0008u, CanonicalizeSet }, - { 0x03f1u, 0x03f1u, 0x000bu, CanonicalizeSet }, - { 0x03f2u, 0x03f2u, 0x0007u, CanonicalizeRangeLo }, - { 0x03f3u, 0x03f4u, 0x0000u, CanonicalizeUnique }, - { 0x03f5u, 0x03f5u, 0x0005u, CanonicalizeSet }, - { 0x03f6u, 0x03f6u, 0x0000u, CanonicalizeUnique }, - { 0x03f7u, 0x03f8u, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x03f9u, 0x03f9u, 0x0007u, CanonicalizeRangeHi }, - { 0x03fau, 0x03fbu, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x03fcu, 0x03fcu, 0x0000u, CanonicalizeUnique }, - { 0x03fdu, 0x03ffu, 0x0082u, CanonicalizeRangeHi }, - { 0x0400u, 0x040fu, 0x0050u, CanonicalizeRangeLo }, - { 0x0410u, 0x042fu, 0x0020u, CanonicalizeRangeLo }, - { 0x0430u, 0x044fu, 0x0020u, CanonicalizeRangeHi }, - { 0x0450u, 0x045fu, 0x0050u, CanonicalizeRangeHi }, - { 0x0460u, 0x0481u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x0482u, 0x0489u, 0x0000u, CanonicalizeUnique }, - { 0x048au, 0x04bfu, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x04c0u, 0x04c0u, 0x000fu, CanonicalizeRangeLo }, - { 0x04c1u, 0x04ceu, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x04cfu, 0x04cfu, 0x000fu, CanonicalizeRangeHi }, - { 0x04d0u, 0x0527u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x0528u, 0x0530u, 0x0000u, CanonicalizeUnique }, - { 0x0531u, 0x0556u, 0x0030u, CanonicalizeRangeLo }, - { 0x0557u, 0x0560u, 0x0000u, CanonicalizeUnique }, - { 0x0561u, 0x0586u, 0x0030u, CanonicalizeRangeHi }, - { 0x0587u, 0x109fu, 0x0000u, CanonicalizeUnique }, - { 0x10a0u, 0x10c5u, 0x1c60u, CanonicalizeRangeLo }, - { 0x10c6u, 0x1d78u, 0x0000u, CanonicalizeUnique }, - { 0x1d79u, 0x1d79u, 0x8a04u, CanonicalizeRangeLo }, - { 0x1d7au, 0x1d7cu, 0x0000u, CanonicalizeUnique }, - { 0x1d7du, 0x1d7du, 0x0ee6u, CanonicalizeRangeLo }, - { 0x1d7eu, 0x1dffu, 0x0000u, CanonicalizeUnique }, - { 0x1e00u, 0x1e5fu, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x1e60u, 0x1e61u, 0x000eu, CanonicalizeSet }, - { 0x1e62u, 0x1e95u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x1e96u, 0x1e9au, 0x0000u, CanonicalizeUnique }, - { 0x1e9bu, 0x1e9bu, 0x000eu, CanonicalizeSet }, - { 0x1e9cu, 0x1e9fu, 0x0000u, CanonicalizeUnique }, - { 0x1ea0u, 0x1effu, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x1f00u, 0x1f07u, 0x0008u, CanonicalizeRangeLo }, - { 0x1f08u, 0x1f0fu, 0x0008u, CanonicalizeRangeHi }, - { 0x1f10u, 0x1f15u, 0x0008u, CanonicalizeRangeLo }, - { 0x1f16u, 0x1f17u, 0x0000u, CanonicalizeUnique }, - { 0x1f18u, 0x1f1du, 0x0008u, CanonicalizeRangeHi }, - { 0x1f1eu, 0x1f1fu, 0x0000u, CanonicalizeUnique }, - { 0x1f20u, 0x1f27u, 0x0008u, CanonicalizeRangeLo }, - { 0x1f28u, 0x1f2fu, 0x0008u, CanonicalizeRangeHi }, - { 0x1f30u, 0x1f37u, 0x0008u, CanonicalizeRangeLo }, - { 0x1f38u, 0x1f3fu, 0x0008u, CanonicalizeRangeHi }, - { 0x1f40u, 0x1f45u, 0x0008u, CanonicalizeRangeLo }, - { 0x1f46u, 0x1f47u, 0x0000u, CanonicalizeUnique }, - { 0x1f48u, 0x1f4du, 0x0008u, CanonicalizeRangeHi }, - { 0x1f4eu, 0x1f50u, 0x0000u, CanonicalizeUnique }, - { 0x1f51u, 0x1f51u, 0x0008u, CanonicalizeRangeLo }, - { 0x1f52u, 0x1f52u, 0x0000u, CanonicalizeUnique }, - { 0x1f53u, 0x1f53u, 0x0008u, CanonicalizeRangeLo }, - { 0x1f54u, 0x1f54u, 0x0000u, CanonicalizeUnique }, - { 0x1f55u, 0x1f55u, 0x0008u, CanonicalizeRangeLo }, - { 0x1f56u, 0x1f56u, 0x0000u, CanonicalizeUnique }, - { 0x1f57u, 0x1f57u, 0x0008u, CanonicalizeRangeLo }, - { 0x1f58u, 0x1f58u, 0x0000u, CanonicalizeUnique }, - { 0x1f59u, 0x1f59u, 0x0008u, CanonicalizeRangeHi }, - { 0x1f5au, 0x1f5au, 0x0000u, CanonicalizeUnique }, - { 0x1f5bu, 0x1f5bu, 0x0008u, CanonicalizeRangeHi }, - { 0x1f5cu, 0x1f5cu, 0x0000u, CanonicalizeUnique }, - { 0x1f5du, 0x1f5du, 0x0008u, CanonicalizeRangeHi }, - { 0x1f5eu, 0x1f5eu, 0x0000u, CanonicalizeUnique }, - { 0x1f5fu, 0x1f5fu, 0x0008u, CanonicalizeRangeHi }, - { 0x1f60u, 0x1f67u, 0x0008u, CanonicalizeRangeLo }, - { 0x1f68u, 0x1f6fu, 0x0008u, CanonicalizeRangeHi }, - { 0x1f70u, 0x1f71u, 0x004au, CanonicalizeRangeLo }, - { 0x1f72u, 0x1f75u, 0x0056u, CanonicalizeRangeLo }, - { 0x1f76u, 0x1f77u, 0x0064u, CanonicalizeRangeLo }, - { 0x1f78u, 0x1f79u, 0x0080u, CanonicalizeRangeLo }, - { 0x1f7au, 0x1f7bu, 0x0070u, CanonicalizeRangeLo }, - { 0x1f7cu, 0x1f7du, 0x007eu, CanonicalizeRangeLo }, - { 0x1f7eu, 0x1fafu, 0x0000u, CanonicalizeUnique }, - { 0x1fb0u, 0x1fb1u, 0x0008u, CanonicalizeRangeLo }, - { 0x1fb2u, 0x1fb7u, 0x0000u, CanonicalizeUnique }, - { 0x1fb8u, 0x1fb9u, 0x0008u, CanonicalizeRangeHi }, - { 0x1fbau, 0x1fbbu, 0x004au, CanonicalizeRangeHi }, - { 0x1fbcu, 0x1fbdu, 0x0000u, CanonicalizeUnique }, - { 0x1fbeu, 0x1fbeu, 0x0007u, CanonicalizeSet }, - { 0x1fbfu, 0x1fc7u, 0x0000u, CanonicalizeUnique }, - { 0x1fc8u, 0x1fcbu, 0x0056u, CanonicalizeRangeHi }, - { 0x1fccu, 0x1fcfu, 0x0000u, CanonicalizeUnique }, - { 0x1fd0u, 0x1fd1u, 0x0008u, CanonicalizeRangeLo }, - { 0x1fd2u, 0x1fd7u, 0x0000u, CanonicalizeUnique }, - { 0x1fd8u, 0x1fd9u, 0x0008u, CanonicalizeRangeHi }, - { 0x1fdau, 0x1fdbu, 0x0064u, CanonicalizeRangeHi }, - { 0x1fdcu, 0x1fdfu, 0x0000u, CanonicalizeUnique }, - { 0x1fe0u, 0x1fe1u, 0x0008u, CanonicalizeRangeLo }, - { 0x1fe2u, 0x1fe4u, 0x0000u, CanonicalizeUnique }, - { 0x1fe5u, 0x1fe5u, 0x0007u, CanonicalizeRangeLo }, - { 0x1fe6u, 0x1fe7u, 0x0000u, CanonicalizeUnique }, - { 0x1fe8u, 0x1fe9u, 0x0008u, CanonicalizeRangeHi }, - { 0x1feau, 0x1febu, 0x0070u, CanonicalizeRangeHi }, - { 0x1fecu, 0x1fecu, 0x0007u, CanonicalizeRangeHi }, - { 0x1fedu, 0x1ff7u, 0x0000u, CanonicalizeUnique }, - { 0x1ff8u, 0x1ff9u, 0x0080u, CanonicalizeRangeHi }, - { 0x1ffau, 0x1ffbu, 0x007eu, CanonicalizeRangeHi }, - { 0x1ffcu, 0x2131u, 0x0000u, CanonicalizeUnique }, - { 0x2132u, 0x2132u, 0x001cu, CanonicalizeRangeLo }, - { 0x2133u, 0x214du, 0x0000u, CanonicalizeUnique }, - { 0x214eu, 0x214eu, 0x001cu, CanonicalizeRangeHi }, - { 0x214fu, 0x215fu, 0x0000u, CanonicalizeUnique }, - { 0x2160u, 0x216fu, 0x0010u, CanonicalizeRangeLo }, - { 0x2170u, 0x217fu, 0x0010u, CanonicalizeRangeHi }, - { 0x2180u, 0x2182u, 0x0000u, CanonicalizeUnique }, - { 0x2183u, 0x2184u, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x2185u, 0x24b5u, 0x0000u, CanonicalizeUnique }, - { 0x24b6u, 0x24cfu, 0x001au, CanonicalizeRangeLo }, - { 0x24d0u, 0x24e9u, 0x001au, CanonicalizeRangeHi }, - { 0x24eau, 0x2bffu, 0x0000u, CanonicalizeUnique }, - { 0x2c00u, 0x2c2eu, 0x0030u, CanonicalizeRangeLo }, - { 0x2c2fu, 0x2c2fu, 0x0000u, CanonicalizeUnique }, - { 0x2c30u, 0x2c5eu, 0x0030u, CanonicalizeRangeHi }, - { 0x2c5fu, 0x2c5fu, 0x0000u, CanonicalizeUnique }, - { 0x2c60u, 0x2c61u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x2c62u, 0x2c62u, 0x29f7u, CanonicalizeRangeHi }, - { 0x2c63u, 0x2c63u, 0x0ee6u, CanonicalizeRangeHi }, - { 0x2c64u, 0x2c64u, 0x29e7u, CanonicalizeRangeHi }, - { 0x2c65u, 0x2c65u, 0x2a2bu, CanonicalizeRangeHi }, - { 0x2c66u, 0x2c66u, 0x2a28u, CanonicalizeRangeHi }, - { 0x2c67u, 0x2c6cu, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x2c6du, 0x2c6du, 0x2a1cu, CanonicalizeRangeHi }, - { 0x2c6eu, 0x2c6eu, 0x29fdu, CanonicalizeRangeHi }, - { 0x2c6fu, 0x2c6fu, 0x2a1fu, CanonicalizeRangeHi }, - { 0x2c70u, 0x2c70u, 0x2a1eu, CanonicalizeRangeHi }, - { 0x2c71u, 0x2c71u, 0x0000u, CanonicalizeUnique }, - { 0x2c72u, 0x2c73u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x2c74u, 0x2c74u, 0x0000u, CanonicalizeUnique }, - { 0x2c75u, 0x2c76u, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x2c77u, 0x2c7du, 0x0000u, CanonicalizeUnique }, - { 0x2c7eu, 0x2c7fu, 0x2a3fu, CanonicalizeRangeHi }, - { 0x2c80u, 0x2ce3u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0x2ce4u, 0x2ceau, 0x0000u, CanonicalizeUnique }, - { 0x2cebu, 0x2ceeu, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0x2cefu, 0x2cffu, 0x0000u, CanonicalizeUnique }, - { 0x2d00u, 0x2d25u, 0x1c60u, CanonicalizeRangeHi }, - { 0x2d26u, 0xa63fu, 0x0000u, CanonicalizeUnique }, - { 0xa640u, 0xa66du, 0x0000u, CanonicalizeAlternatingAligned }, - { 0xa66eu, 0xa67fu, 0x0000u, CanonicalizeUnique }, - { 0xa680u, 0xa697u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0xa698u, 0xa721u, 0x0000u, CanonicalizeUnique }, - { 0xa722u, 0xa72fu, 0x0000u, CanonicalizeAlternatingAligned }, - { 0xa730u, 0xa731u, 0x0000u, CanonicalizeUnique }, - { 0xa732u, 0xa76fu, 0x0000u, CanonicalizeAlternatingAligned }, - { 0xa770u, 0xa778u, 0x0000u, CanonicalizeUnique }, - { 0xa779u, 0xa77cu, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0xa77du, 0xa77du, 0x8a04u, CanonicalizeRangeHi }, - { 0xa77eu, 0xa787u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0xa788u, 0xa78au, 0x0000u, CanonicalizeUnique }, - { 0xa78bu, 0xa78cu, 0x0000u, CanonicalizeAlternatingUnaligned }, - { 0xa78du, 0xa78du, 0xa528u, CanonicalizeRangeHi }, - { 0xa78eu, 0xa78fu, 0x0000u, CanonicalizeUnique }, - { 0xa790u, 0xa791u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0xa792u, 0xa79fu, 0x0000u, CanonicalizeUnique }, - { 0xa7a0u, 0xa7a9u, 0x0000u, CanonicalizeAlternatingAligned }, - { 0xa7aau, 0xff20u, 0x0000u, CanonicalizeUnique }, - { 0xff21u, 0xff3au, 0x0020u, CanonicalizeRangeLo }, - { 0xff3bu, 0xff40u, 0x0000u, CanonicalizeUnique }, - { 0xff41u, 0xff5au, 0x0020u, CanonicalizeRangeHi }, - { 0xff5bu, 0xffffu, 0x0000u, CanonicalizeUnique }, -}; - -const size_t LATIN_CANONICALIZATION_RANGES = 20; -LatinCanonicalizationRange latinRangeInfo[LATIN_CANONICALIZATION_RANGES] = { - { 0x0000u, 0x0040u, 0x0000u, CanonicalizeLatinSelf }, - { 0x0041u, 0x005au, 0x0000u, CanonicalizeLatinMask0x20 }, - { 0x005bu, 0x0060u, 0x0000u, CanonicalizeLatinSelf }, - { 0x0061u, 0x007au, 0x0000u, CanonicalizeLatinMask0x20 }, - { 0x007bu, 0x00bfu, 0x0000u, CanonicalizeLatinSelf }, - { 0x00c0u, 0x00d6u, 0x0000u, CanonicalizeLatinMask0x20 }, - { 0x00d7u, 0x00d7u, 0x0000u, CanonicalizeLatinSelf }, - { 0x00d8u, 0x00deu, 0x0000u, CanonicalizeLatinMask0x20 }, - { 0x00dfu, 0x00dfu, 0x0000u, CanonicalizeLatinSelf }, - { 0x00e0u, 0x00f6u, 0x0000u, CanonicalizeLatinMask0x20 }, - { 0x00f7u, 0x00f7u, 0x0000u, CanonicalizeLatinSelf }, - { 0x00f8u, 0x00feu, 0x0000u, CanonicalizeLatinMask0x20 }, - { 0x00ffu, 0x00ffu, 0x0000u, CanonicalizeLatinSelf }, - { 0x0100u, 0x0177u, 0x0000u, CanonicalizeLatinInvalid }, - { 0x0178u, 0x0178u, 0x00ffu, CanonicalizeLatinOther }, - { 0x0179u, 0x039bu, 0x0000u, CanonicalizeLatinInvalid }, - { 0x039cu, 0x039cu, 0x00b5u, CanonicalizeLatinOther }, - { 0x039du, 0x03bbu, 0x0000u, CanonicalizeLatinInvalid }, - { 0x03bcu, 0x03bcu, 0x00b5u, CanonicalizeLatinOther }, - { 0x03bdu, 0xffffu, 0x0000u, CanonicalizeLatinInvalid }, +const size_t UCS2_CANONICALIZATION_RANGES = 391; +const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = { + { 0x0000, 0x0040, 0x0000, CanonicalizeUnique }, + { 0x0041, 0x005a, 0x0020, CanonicalizeRangeLo }, + { 0x005b, 0x0060, 0x0000, CanonicalizeUnique }, + { 0x0061, 0x007a, 0x0020, CanonicalizeRangeHi }, + { 0x007b, 0x00b4, 0x0000, CanonicalizeUnique }, + { 0x00b5, 0x00b5, 0x0009, CanonicalizeSet }, + { 0x00b6, 0x00bf, 0x0000, CanonicalizeUnique }, + { 0x00c0, 0x00d6, 0x0020, CanonicalizeRangeLo }, + { 0x00d7, 0x00d7, 0x0000, CanonicalizeUnique }, + { 0x00d8, 0x00de, 0x0020, CanonicalizeRangeLo }, + { 0x00df, 0x00df, 0x0000, CanonicalizeUnique }, + { 0x00e0, 0x00f6, 0x0020, CanonicalizeRangeHi }, + { 0x00f7, 0x00f7, 0x0000, CanonicalizeUnique }, + { 0x00f8, 0x00fe, 0x0020, CanonicalizeRangeHi }, + { 0x00ff, 0x00ff, 0x0079, CanonicalizeRangeLo }, + { 0x0100, 0x012f, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0130, 0x0131, 0x0000, CanonicalizeUnique }, + { 0x0132, 0x0137, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0138, 0x0138, 0x0000, CanonicalizeUnique }, + { 0x0139, 0x0148, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x0149, 0x0149, 0x0000, CanonicalizeUnique }, + { 0x014a, 0x0177, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0178, 0x0178, 0x0079, CanonicalizeRangeHi }, + { 0x0179, 0x017e, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x017f, 0x017f, 0x0000, CanonicalizeUnique }, + { 0x0180, 0x0180, 0x00c3, CanonicalizeRangeLo }, + { 0x0181, 0x0181, 0x00d2, CanonicalizeRangeLo }, + { 0x0182, 0x0185, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0186, 0x0186, 0x00ce, CanonicalizeRangeLo }, + { 0x0187, 0x0188, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x0189, 0x018a, 0x00cd, CanonicalizeRangeLo }, + { 0x018b, 0x018c, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x018d, 0x018d, 0x0000, CanonicalizeUnique }, + { 0x018e, 0x018e, 0x004f, CanonicalizeRangeLo }, + { 0x018f, 0x018f, 0x00ca, CanonicalizeRangeLo }, + { 0x0190, 0x0190, 0x00cb, CanonicalizeRangeLo }, + { 0x0191, 0x0192, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x0193, 0x0193, 0x00cd, CanonicalizeRangeLo }, + { 0x0194, 0x0194, 0x00cf, CanonicalizeRangeLo }, + { 0x0195, 0x0195, 0x0061, CanonicalizeRangeLo }, + { 0x0196, 0x0196, 0x00d3, CanonicalizeRangeLo }, + { 0x0197, 0x0197, 0x00d1, CanonicalizeRangeLo }, + { 0x0198, 0x0199, 0x0000, CanonicalizeAlternatingAligned }, + { 0x019a, 0x019a, 0x00a3, CanonicalizeRangeLo }, + { 0x019b, 0x019b, 0x0000, CanonicalizeUnique }, + { 0x019c, 0x019c, 0x00d3, CanonicalizeRangeLo }, + { 0x019d, 0x019d, 0x00d5, CanonicalizeRangeLo }, + { 0x019e, 0x019e, 0x0082, CanonicalizeRangeLo }, + { 0x019f, 0x019f, 0x00d6, CanonicalizeRangeLo }, + { 0x01a0, 0x01a5, 0x0000, CanonicalizeAlternatingAligned }, + { 0x01a6, 0x01a6, 0x00da, CanonicalizeRangeLo }, + { 0x01a7, 0x01a8, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x01a9, 0x01a9, 0x00da, CanonicalizeRangeLo }, + { 0x01aa, 0x01ab, 0x0000, CanonicalizeUnique }, + { 0x01ac, 0x01ad, 0x0000, CanonicalizeAlternatingAligned }, + { 0x01ae, 0x01ae, 0x00da, CanonicalizeRangeLo }, + { 0x01af, 0x01b0, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x01b1, 0x01b2, 0x00d9, CanonicalizeRangeLo }, + { 0x01b3, 0x01b6, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x01b7, 0x01b7, 0x00db, CanonicalizeRangeLo }, + { 0x01b8, 0x01b9, 0x0000, CanonicalizeAlternatingAligned }, + { 0x01ba, 0x01bb, 0x0000, CanonicalizeUnique }, + { 0x01bc, 0x01bd, 0x0000, CanonicalizeAlternatingAligned }, + { 0x01be, 0x01be, 0x0000, CanonicalizeUnique }, + { 0x01bf, 0x01bf, 0x0038, CanonicalizeRangeLo }, + { 0x01c0, 0x01c3, 0x0000, CanonicalizeUnique }, + { 0x01c4, 0x01c6, 0x0000, CanonicalizeSet }, + { 0x01c7, 0x01c9, 0x0001, CanonicalizeSet }, + { 0x01ca, 0x01cc, 0x0002, CanonicalizeSet }, + { 0x01cd, 0x01dc, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x01dd, 0x01dd, 0x004f, CanonicalizeRangeHi }, + { 0x01de, 0x01ef, 0x0000, CanonicalizeAlternatingAligned }, + { 0x01f0, 0x01f0, 0x0000, CanonicalizeUnique }, + { 0x01f1, 0x01f3, 0x0003, CanonicalizeSet }, + { 0x01f4, 0x01f5, 0x0000, CanonicalizeAlternatingAligned }, + { 0x01f6, 0x01f6, 0x0061, CanonicalizeRangeHi }, + { 0x01f7, 0x01f7, 0x0038, CanonicalizeRangeHi }, + { 0x01f8, 0x021f, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0220, 0x0220, 0x0082, CanonicalizeRangeHi }, + { 0x0221, 0x0221, 0x0000, CanonicalizeUnique }, + { 0x0222, 0x0233, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0234, 0x0239, 0x0000, CanonicalizeUnique }, + { 0x023a, 0x023a, 0x2a2b, CanonicalizeRangeLo }, + { 0x023b, 0x023c, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x023d, 0x023d, 0x00a3, CanonicalizeRangeHi }, + { 0x023e, 0x023e, 0x2a28, CanonicalizeRangeLo }, + { 0x023f, 0x0240, 0x2a3f, CanonicalizeRangeLo }, + { 0x0241, 0x0242, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x0243, 0x0243, 0x00c3, CanonicalizeRangeHi }, + { 0x0244, 0x0244, 0x0045, CanonicalizeRangeLo }, + { 0x0245, 0x0245, 0x0047, CanonicalizeRangeLo }, + { 0x0246, 0x024f, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0250, 0x0250, 0x2a1f, CanonicalizeRangeLo }, + { 0x0251, 0x0251, 0x2a1c, CanonicalizeRangeLo }, + { 0x0252, 0x0252, 0x2a1e, CanonicalizeRangeLo }, + { 0x0253, 0x0253, 0x00d2, CanonicalizeRangeHi }, + { 0x0254, 0x0254, 0x00ce, CanonicalizeRangeHi }, + { 0x0255, 0x0255, 0x0000, CanonicalizeUnique }, + { 0x0256, 0x0257, 0x00cd, CanonicalizeRangeHi }, + { 0x0258, 0x0258, 0x0000, CanonicalizeUnique }, + { 0x0259, 0x0259, 0x00ca, CanonicalizeRangeHi }, + { 0x025a, 0x025a, 0x0000, CanonicalizeUnique }, + { 0x025b, 0x025b, 0x00cb, CanonicalizeRangeHi }, + { 0x025c, 0x025c, 0xa54f, CanonicalizeRangeLo }, + { 0x025d, 0x025f, 0x0000, CanonicalizeUnique }, + { 0x0260, 0x0260, 0x00cd, CanonicalizeRangeHi }, + { 0x0261, 0x0261, 0xa54b, CanonicalizeRangeLo }, + { 0x0262, 0x0262, 0x0000, CanonicalizeUnique }, + { 0x0263, 0x0263, 0x00cf, CanonicalizeRangeHi }, + { 0x0264, 0x0264, 0x0000, CanonicalizeUnique }, + { 0x0265, 0x0265, 0xa528, CanonicalizeRangeLo }, + { 0x0266, 0x0266, 0xa544, CanonicalizeRangeLo }, + { 0x0267, 0x0267, 0x0000, CanonicalizeUnique }, + { 0x0268, 0x0268, 0x00d1, CanonicalizeRangeHi }, + { 0x0269, 0x0269, 0x00d3, CanonicalizeRangeHi }, + { 0x026a, 0x026a, 0x0000, CanonicalizeUnique }, + { 0x026b, 0x026b, 0x29f7, CanonicalizeRangeLo }, + { 0x026c, 0x026c, 0xa541, CanonicalizeRangeLo }, + { 0x026d, 0x026e, 0x0000, CanonicalizeUnique }, + { 0x026f, 0x026f, 0x00d3, CanonicalizeRangeHi }, + { 0x0270, 0x0270, 0x0000, CanonicalizeUnique }, + { 0x0271, 0x0271, 0x29fd, CanonicalizeRangeLo }, + { 0x0272, 0x0272, 0x00d5, CanonicalizeRangeHi }, + { 0x0273, 0x0274, 0x0000, CanonicalizeUnique }, + { 0x0275, 0x0275, 0x00d6, CanonicalizeRangeHi }, + { 0x0276, 0x027c, 0x0000, CanonicalizeUnique }, + { 0x027d, 0x027d, 0x29e7, CanonicalizeRangeLo }, + { 0x027e, 0x027f, 0x0000, CanonicalizeUnique }, + { 0x0280, 0x0280, 0x00da, CanonicalizeRangeHi }, + { 0x0281, 0x0282, 0x0000, CanonicalizeUnique }, + { 0x0283, 0x0283, 0x00da, CanonicalizeRangeHi }, + { 0x0284, 0x0286, 0x0000, CanonicalizeUnique }, + { 0x0287, 0x0287, 0xa52a, CanonicalizeRangeLo }, + { 0x0288, 0x0288, 0x00da, CanonicalizeRangeHi }, + { 0x0289, 0x0289, 0x0045, CanonicalizeRangeHi }, + { 0x028a, 0x028b, 0x00d9, CanonicalizeRangeHi }, + { 0x028c, 0x028c, 0x0047, CanonicalizeRangeHi }, + { 0x028d, 0x0291, 0x0000, CanonicalizeUnique }, + { 0x0292, 0x0292, 0x00db, CanonicalizeRangeHi }, + { 0x0293, 0x029d, 0x0000, CanonicalizeUnique }, + { 0x029e, 0x029e, 0xa512, CanonicalizeRangeLo }, + { 0x029f, 0x0344, 0x0000, CanonicalizeUnique }, + { 0x0345, 0x0345, 0x0007, CanonicalizeSet }, + { 0x0346, 0x036f, 0x0000, CanonicalizeUnique }, + { 0x0370, 0x0373, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0374, 0x0375, 0x0000, CanonicalizeUnique }, + { 0x0376, 0x0377, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0378, 0x037a, 0x0000, CanonicalizeUnique }, + { 0x037b, 0x037d, 0x0082, CanonicalizeRangeLo }, + { 0x037e, 0x037e, 0x0000, CanonicalizeUnique }, + { 0x037f, 0x037f, 0x0074, CanonicalizeRangeLo }, + { 0x0380, 0x0385, 0x0000, CanonicalizeUnique }, + { 0x0386, 0x0386, 0x0026, CanonicalizeRangeLo }, + { 0x0387, 0x0387, 0x0000, CanonicalizeUnique }, + { 0x0388, 0x038a, 0x0025, CanonicalizeRangeLo }, + { 0x038b, 0x038b, 0x0000, CanonicalizeUnique }, + { 0x038c, 0x038c, 0x0040, CanonicalizeRangeLo }, + { 0x038d, 0x038d, 0x0000, CanonicalizeUnique }, + { 0x038e, 0x038f, 0x003f, CanonicalizeRangeLo }, + { 0x0390, 0x0390, 0x0000, CanonicalizeUnique }, + { 0x0391, 0x0391, 0x0020, CanonicalizeRangeLo }, + { 0x0392, 0x0392, 0x0004, CanonicalizeSet }, + { 0x0393, 0x0394, 0x0020, CanonicalizeRangeLo }, + { 0x0395, 0x0395, 0x0005, CanonicalizeSet }, + { 0x0396, 0x0397, 0x0020, CanonicalizeRangeLo }, + { 0x0398, 0x0398, 0x0006, CanonicalizeSet }, + { 0x0399, 0x0399, 0x0007, CanonicalizeSet }, + { 0x039a, 0x039a, 0x0008, CanonicalizeSet }, + { 0x039b, 0x039b, 0x0020, CanonicalizeRangeLo }, + { 0x039c, 0x039c, 0x0009, CanonicalizeSet }, + { 0x039d, 0x039f, 0x0020, CanonicalizeRangeLo }, + { 0x03a0, 0x03a0, 0x000a, CanonicalizeSet }, + { 0x03a1, 0x03a1, 0x000b, CanonicalizeSet }, + { 0x03a2, 0x03a2, 0x0000, CanonicalizeUnique }, + { 0x03a3, 0x03a3, 0x000c, CanonicalizeSet }, + { 0x03a4, 0x03a5, 0x0020, CanonicalizeRangeLo }, + { 0x03a6, 0x03a6, 0x000d, CanonicalizeSet }, + { 0x03a7, 0x03ab, 0x0020, CanonicalizeRangeLo }, + { 0x03ac, 0x03ac, 0x0026, CanonicalizeRangeHi }, + { 0x03ad, 0x03af, 0x0025, CanonicalizeRangeHi }, + { 0x03b0, 0x03b0, 0x0000, CanonicalizeUnique }, + { 0x03b1, 0x03b1, 0x0020, CanonicalizeRangeHi }, + { 0x03b2, 0x03b2, 0x0004, CanonicalizeSet }, + { 0x03b3, 0x03b4, 0x0020, CanonicalizeRangeHi }, + { 0x03b5, 0x03b5, 0x0005, CanonicalizeSet }, + { 0x03b6, 0x03b7, 0x0020, CanonicalizeRangeHi }, + { 0x03b8, 0x03b8, 0x0006, CanonicalizeSet }, + { 0x03b9, 0x03b9, 0x0007, CanonicalizeSet }, + { 0x03ba, 0x03ba, 0x0008, CanonicalizeSet }, + { 0x03bb, 0x03bb, 0x0020, CanonicalizeRangeHi }, + { 0x03bc, 0x03bc, 0x0009, CanonicalizeSet }, + { 0x03bd, 0x03bf, 0x0020, CanonicalizeRangeHi }, + { 0x03c0, 0x03c0, 0x000a, CanonicalizeSet }, + { 0x03c1, 0x03c1, 0x000b, CanonicalizeSet }, + { 0x03c2, 0x03c3, 0x000c, CanonicalizeSet }, + { 0x03c4, 0x03c5, 0x0020, CanonicalizeRangeHi }, + { 0x03c6, 0x03c6, 0x000d, CanonicalizeSet }, + { 0x03c7, 0x03cb, 0x0020, CanonicalizeRangeHi }, + { 0x03cc, 0x03cc, 0x0040, CanonicalizeRangeHi }, + { 0x03cd, 0x03ce, 0x003f, CanonicalizeRangeHi }, + { 0x03cf, 0x03cf, 0x0008, CanonicalizeRangeLo }, + { 0x03d0, 0x03d0, 0x0004, CanonicalizeSet }, + { 0x03d1, 0x03d1, 0x0006, CanonicalizeSet }, + { 0x03d2, 0x03d4, 0x0000, CanonicalizeUnique }, + { 0x03d5, 0x03d5, 0x000d, CanonicalizeSet }, + { 0x03d6, 0x03d6, 0x000a, CanonicalizeSet }, + { 0x03d7, 0x03d7, 0x0008, CanonicalizeRangeHi }, + { 0x03d8, 0x03ef, 0x0000, CanonicalizeAlternatingAligned }, + { 0x03f0, 0x03f0, 0x0008, CanonicalizeSet }, + { 0x03f1, 0x03f1, 0x000b, CanonicalizeSet }, + { 0x03f2, 0x03f2, 0x0007, CanonicalizeRangeLo }, + { 0x03f3, 0x03f3, 0x0074, CanonicalizeRangeHi }, + { 0x03f4, 0x03f4, 0x0000, CanonicalizeUnique }, + { 0x03f5, 0x03f5, 0x0005, CanonicalizeSet }, + { 0x03f6, 0x03f6, 0x0000, CanonicalizeUnique }, + { 0x03f7, 0x03f8, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x03f9, 0x03f9, 0x0007, CanonicalizeRangeHi }, + { 0x03fa, 0x03fb, 0x0000, CanonicalizeAlternatingAligned }, + { 0x03fc, 0x03fc, 0x0000, CanonicalizeUnique }, + { 0x03fd, 0x03ff, 0x0082, CanonicalizeRangeHi }, + { 0x0400, 0x040f, 0x0050, CanonicalizeRangeLo }, + { 0x0410, 0x042f, 0x0020, CanonicalizeRangeLo }, + { 0x0430, 0x044f, 0x0020, CanonicalizeRangeHi }, + { 0x0450, 0x045f, 0x0050, CanonicalizeRangeHi }, + { 0x0460, 0x0481, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0482, 0x0489, 0x0000, CanonicalizeUnique }, + { 0x048a, 0x04bf, 0x0000, CanonicalizeAlternatingAligned }, + { 0x04c0, 0x04c0, 0x000f, CanonicalizeRangeLo }, + { 0x04c1, 0x04ce, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x04cf, 0x04cf, 0x000f, CanonicalizeRangeHi }, + { 0x04d0, 0x052f, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0530, 0x0530, 0x0000, CanonicalizeUnique }, + { 0x0531, 0x0556, 0x0030, CanonicalizeRangeLo }, + { 0x0557, 0x0560, 0x0000, CanonicalizeUnique }, + { 0x0561, 0x0586, 0x0030, CanonicalizeRangeHi }, + { 0x0587, 0x109f, 0x0000, CanonicalizeUnique }, + { 0x10a0, 0x10c5, 0x1c60, CanonicalizeRangeLo }, + { 0x10c6, 0x10c6, 0x0000, CanonicalizeUnique }, + { 0x10c7, 0x10c7, 0x1c60, CanonicalizeRangeLo }, + { 0x10c8, 0x10cc, 0x0000, CanonicalizeUnique }, + { 0x10cd, 0x10cd, 0x1c60, CanonicalizeRangeLo }, + { 0x10ce, 0x1d78, 0x0000, CanonicalizeUnique }, + { 0x1d79, 0x1d79, 0x8a04, CanonicalizeRangeLo }, + { 0x1d7a, 0x1d7c, 0x0000, CanonicalizeUnique }, + { 0x1d7d, 0x1d7d, 0x0ee6, CanonicalizeRangeLo }, + { 0x1d7e, 0x1dff, 0x0000, CanonicalizeUnique }, + { 0x1e00, 0x1e5f, 0x0000, CanonicalizeAlternatingAligned }, + { 0x1e60, 0x1e61, 0x000e, CanonicalizeSet }, + { 0x1e62, 0x1e95, 0x0000, CanonicalizeAlternatingAligned }, + { 0x1e96, 0x1e9a, 0x0000, CanonicalizeUnique }, + { 0x1e9b, 0x1e9b, 0x000e, CanonicalizeSet }, + { 0x1e9c, 0x1e9f, 0x0000, CanonicalizeUnique }, + { 0x1ea0, 0x1eff, 0x0000, CanonicalizeAlternatingAligned }, + { 0x1f00, 0x1f07, 0x0008, CanonicalizeRangeLo }, + { 0x1f08, 0x1f0f, 0x0008, CanonicalizeRangeHi }, + { 0x1f10, 0x1f15, 0x0008, CanonicalizeRangeLo }, + { 0x1f16, 0x1f17, 0x0000, CanonicalizeUnique }, + { 0x1f18, 0x1f1d, 0x0008, CanonicalizeRangeHi }, + { 0x1f1e, 0x1f1f, 0x0000, CanonicalizeUnique }, + { 0x1f20, 0x1f27, 0x0008, CanonicalizeRangeLo }, + { 0x1f28, 0x1f2f, 0x0008, CanonicalizeRangeHi }, + { 0x1f30, 0x1f37, 0x0008, CanonicalizeRangeLo }, + { 0x1f38, 0x1f3f, 0x0008, CanonicalizeRangeHi }, + { 0x1f40, 0x1f45, 0x0008, CanonicalizeRangeLo }, + { 0x1f46, 0x1f47, 0x0000, CanonicalizeUnique }, + { 0x1f48, 0x1f4d, 0x0008, CanonicalizeRangeHi }, + { 0x1f4e, 0x1f50, 0x0000, CanonicalizeUnique }, + { 0x1f51, 0x1f51, 0x0008, CanonicalizeRangeLo }, + { 0x1f52, 0x1f52, 0x0000, CanonicalizeUnique }, + { 0x1f53, 0x1f53, 0x0008, CanonicalizeRangeLo }, + { 0x1f54, 0x1f54, 0x0000, CanonicalizeUnique }, + { 0x1f55, 0x1f55, 0x0008, CanonicalizeRangeLo }, + { 0x1f56, 0x1f56, 0x0000, CanonicalizeUnique }, + { 0x1f57, 0x1f57, 0x0008, CanonicalizeRangeLo }, + { 0x1f58, 0x1f58, 0x0000, CanonicalizeUnique }, + { 0x1f59, 0x1f59, 0x0008, CanonicalizeRangeHi }, + { 0x1f5a, 0x1f5a, 0x0000, CanonicalizeUnique }, + { 0x1f5b, 0x1f5b, 0x0008, CanonicalizeRangeHi }, + { 0x1f5c, 0x1f5c, 0x0000, CanonicalizeUnique }, + { 0x1f5d, 0x1f5d, 0x0008, CanonicalizeRangeHi }, + { 0x1f5e, 0x1f5e, 0x0000, CanonicalizeUnique }, + { 0x1f5f, 0x1f5f, 0x0008, CanonicalizeRangeHi }, + { 0x1f60, 0x1f67, 0x0008, CanonicalizeRangeLo }, + { 0x1f68, 0x1f6f, 0x0008, CanonicalizeRangeHi }, + { 0x1f70, 0x1f71, 0x004a, CanonicalizeRangeLo }, + { 0x1f72, 0x1f75, 0x0056, CanonicalizeRangeLo }, + { 0x1f76, 0x1f77, 0x0064, CanonicalizeRangeLo }, + { 0x1f78, 0x1f79, 0x0080, CanonicalizeRangeLo }, + { 0x1f7a, 0x1f7b, 0x0070, CanonicalizeRangeLo }, + { 0x1f7c, 0x1f7d, 0x007e, CanonicalizeRangeLo }, + { 0x1f7e, 0x1faf, 0x0000, CanonicalizeUnique }, + { 0x1fb0, 0x1fb1, 0x0008, CanonicalizeRangeLo }, + { 0x1fb2, 0x1fb7, 0x0000, CanonicalizeUnique }, + { 0x1fb8, 0x1fb9, 0x0008, CanonicalizeRangeHi }, + { 0x1fba, 0x1fbb, 0x004a, CanonicalizeRangeHi }, + { 0x1fbc, 0x1fbd, 0x0000, CanonicalizeUnique }, + { 0x1fbe, 0x1fbe, 0x0007, CanonicalizeSet }, + { 0x1fbf, 0x1fc7, 0x0000, CanonicalizeUnique }, + { 0x1fc8, 0x1fcb, 0x0056, CanonicalizeRangeHi }, + { 0x1fcc, 0x1fcf, 0x0000, CanonicalizeUnique }, + { 0x1fd0, 0x1fd1, 0x0008, CanonicalizeRangeLo }, + { 0x1fd2, 0x1fd7, 0x0000, CanonicalizeUnique }, + { 0x1fd8, 0x1fd9, 0x0008, CanonicalizeRangeHi }, + { 0x1fda, 0x1fdb, 0x0064, CanonicalizeRangeHi }, + { 0x1fdc, 0x1fdf, 0x0000, CanonicalizeUnique }, + { 0x1fe0, 0x1fe1, 0x0008, CanonicalizeRangeLo }, + { 0x1fe2, 0x1fe4, 0x0000, CanonicalizeUnique }, + { 0x1fe5, 0x1fe5, 0x0007, CanonicalizeRangeLo }, + { 0x1fe6, 0x1fe7, 0x0000, CanonicalizeUnique }, + { 0x1fe8, 0x1fe9, 0x0008, CanonicalizeRangeHi }, + { 0x1fea, 0x1feb, 0x0070, CanonicalizeRangeHi }, + { 0x1fec, 0x1fec, 0x0007, CanonicalizeRangeHi }, + { 0x1fed, 0x1ff7, 0x0000, CanonicalizeUnique }, + { 0x1ff8, 0x1ff9, 0x0080, CanonicalizeRangeHi }, + { 0x1ffa, 0x1ffb, 0x007e, CanonicalizeRangeHi }, + { 0x1ffc, 0x2131, 0x0000, CanonicalizeUnique }, + { 0x2132, 0x2132, 0x001c, CanonicalizeRangeLo }, + { 0x2133, 0x214d, 0x0000, CanonicalizeUnique }, + { 0x214e, 0x214e, 0x001c, CanonicalizeRangeHi }, + { 0x214f, 0x215f, 0x0000, CanonicalizeUnique }, + { 0x2160, 0x216f, 0x0010, CanonicalizeRangeLo }, + { 0x2170, 0x217f, 0x0010, CanonicalizeRangeHi }, + { 0x2180, 0x2182, 0x0000, CanonicalizeUnique }, + { 0x2183, 0x2184, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x2185, 0x24b5, 0x0000, CanonicalizeUnique }, + { 0x24b6, 0x24cf, 0x001a, CanonicalizeRangeLo }, + { 0x24d0, 0x24e9, 0x001a, CanonicalizeRangeHi }, + { 0x24ea, 0x2bff, 0x0000, CanonicalizeUnique }, + { 0x2c00, 0x2c2e, 0x0030, CanonicalizeRangeLo }, + { 0x2c2f, 0x2c2f, 0x0000, CanonicalizeUnique }, + { 0x2c30, 0x2c5e, 0x0030, CanonicalizeRangeHi }, + { 0x2c5f, 0x2c5f, 0x0000, CanonicalizeUnique }, + { 0x2c60, 0x2c61, 0x0000, CanonicalizeAlternatingAligned }, + { 0x2c62, 0x2c62, 0x29f7, CanonicalizeRangeHi }, + { 0x2c63, 0x2c63, 0x0ee6, CanonicalizeRangeHi }, + { 0x2c64, 0x2c64, 0x29e7, CanonicalizeRangeHi }, + { 0x2c65, 0x2c65, 0x2a2b, CanonicalizeRangeHi }, + { 0x2c66, 0x2c66, 0x2a28, CanonicalizeRangeHi }, + { 0x2c67, 0x2c6c, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x2c6d, 0x2c6d, 0x2a1c, CanonicalizeRangeHi }, + { 0x2c6e, 0x2c6e, 0x29fd, CanonicalizeRangeHi }, + { 0x2c6f, 0x2c6f, 0x2a1f, CanonicalizeRangeHi }, + { 0x2c70, 0x2c70, 0x2a1e, CanonicalizeRangeHi }, + { 0x2c71, 0x2c71, 0x0000, CanonicalizeUnique }, + { 0x2c72, 0x2c73, 0x0000, CanonicalizeAlternatingAligned }, + { 0x2c74, 0x2c74, 0x0000, CanonicalizeUnique }, + { 0x2c75, 0x2c76, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x2c77, 0x2c7d, 0x0000, CanonicalizeUnique }, + { 0x2c7e, 0x2c7f, 0x2a3f, CanonicalizeRangeHi }, + { 0x2c80, 0x2ce3, 0x0000, CanonicalizeAlternatingAligned }, + { 0x2ce4, 0x2cea, 0x0000, CanonicalizeUnique }, + { 0x2ceb, 0x2cee, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x2cef, 0x2cf1, 0x0000, CanonicalizeUnique }, + { 0x2cf2, 0x2cf3, 0x0000, CanonicalizeAlternatingAligned }, + { 0x2cf4, 0x2cff, 0x0000, CanonicalizeUnique }, + { 0x2d00, 0x2d25, 0x1c60, CanonicalizeRangeHi }, + { 0x2d26, 0x2d26, 0x0000, CanonicalizeUnique }, + { 0x2d27, 0x2d27, 0x1c60, CanonicalizeRangeHi }, + { 0x2d28, 0x2d2c, 0x0000, CanonicalizeUnique }, + { 0x2d2d, 0x2d2d, 0x1c60, CanonicalizeRangeHi }, + { 0x2d2e, 0xa63f, 0x0000, CanonicalizeUnique }, + { 0xa640, 0xa66d, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa66e, 0xa67f, 0x0000, CanonicalizeUnique }, + { 0xa680, 0xa69b, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa69c, 0xa721, 0x0000, CanonicalizeUnique }, + { 0xa722, 0xa72f, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa730, 0xa731, 0x0000, CanonicalizeUnique }, + { 0xa732, 0xa76f, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa770, 0xa778, 0x0000, CanonicalizeUnique }, + { 0xa779, 0xa77c, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0xa77d, 0xa77d, 0x8a04, CanonicalizeRangeHi }, + { 0xa77e, 0xa787, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa788, 0xa78a, 0x0000, CanonicalizeUnique }, + { 0xa78b, 0xa78c, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0xa78d, 0xa78d, 0xa528, CanonicalizeRangeHi }, + { 0xa78e, 0xa78f, 0x0000, CanonicalizeUnique }, + { 0xa790, 0xa793, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa794, 0xa795, 0x0000, CanonicalizeUnique }, + { 0xa796, 0xa7a9, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa7aa, 0xa7aa, 0xa544, CanonicalizeRangeHi }, + { 0xa7ab, 0xa7ab, 0xa54f, CanonicalizeRangeHi }, + { 0xa7ac, 0xa7ac, 0xa54b, CanonicalizeRangeHi }, + { 0xa7ad, 0xa7ad, 0xa541, CanonicalizeRangeHi }, + { 0xa7ae, 0xa7af, 0x0000, CanonicalizeUnique }, + { 0xa7b0, 0xa7b0, 0xa512, CanonicalizeRangeHi }, + { 0xa7b1, 0xa7b1, 0xa52a, CanonicalizeRangeHi }, + { 0xa7b2, 0xff20, 0x0000, CanonicalizeUnique }, + { 0xff21, 0xff3a, 0x0020, CanonicalizeRangeLo }, + { 0xff3b, 0xff40, 0x0000, CanonicalizeUnique }, + { 0xff41, 0xff5a, 0x0020, CanonicalizeRangeHi }, + { 0xff5b, 0xffff, 0x0000, CanonicalizeUnique }, }; } } // JSC::Yarr diff --git a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js index 00361dd46e..dc578cfece 100644 --- a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js +++ b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012 Apple Inc. All rights reserved. + * Copyright (C) 2012, 2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,7 +23,61 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -// See ES 5.1, 15.10.2.8 +function printHeader() +{ + var copyright = ( + "/*" + "\n" + + " * Copyright (C) 2012-2013, 2015-2016 Apple Inc. All rights reserved." + "\n" + + " *" + "\n" + + " * Redistribution and use in source and binary forms, with or without" + "\n" + + " * modification, are permitted provided that the following conditions" + "\n" + + " * are met:" + "\n" + + " * 1. Redistributions of source code must retain the above copyright" + "\n" + + " * notice, this list of conditions and the following disclaimer." + "\n" + + " * 2. Redistributions in binary form must reproduce the above copyright" + "\n" + + " * notice, this list of conditions and the following disclaimer in the" + "\n" + + " * documentation and/or other materials provided with the distribution." + "\n" + + " *" + "\n" + + " * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY" + "\n" + + " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE" + "\n" + + " * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR" + "\n" + + " * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR" + "\n" + + " * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL," + "\n" + + " * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO," + "\n" + + " * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR" + "\n" + + " * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY" + "\n" + + " * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT" + "\n" + + " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE" + "\n" + + " * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. " + "\n" + + " */"); + + print(copyright); + print(); + print("// DO NOT EDIT! - this file autogenerated by YarrCanonicalize.js"); + print(); + print('#include "config.h"'); + print('#include "YarrCanonicalize.h"'); + print(); + print("namespace JSC { namespace Yarr {"); + print(); +} + +function printFooter() +{ + print("} } // JSC::Yarr"); + print(); +} + +// Helper function to convert a number to a fixed width hex representation of a UChar32. +function hex(x) +{ + var s = Number(x).toString(16); + while (s.length < 4) + s = 0 + s; + return "0x" + s; +} + +// See ES 6.0, 21.2.2.8.2 Steps 3 function canonicalize(ch) { var u = String.fromCharCode(ch).toUpperCase(); @@ -36,184 +90,104 @@ function canonicalize(ch) } var MAX_UCS2 = 0xFFFF; -var MAX_LATIN = 0xFF; - -var groupedCanonically = []; -// Pass 1: populate groupedCanonically - this is mapping from canonicalized -// values back to the set of character code that canonicalize to them. -for (var i = 0; i <= MAX_UCS2; ++i) { - var ch = canonicalize(i); - if (!groupedCanonically[ch]) - groupedCanonically[ch] = []; - groupedCanonically[ch].push(i); -} -var typeInfo = []; -var latinTypeInfo = []; -var characterSetInfo = []; -// Pass 2: populate typeInfo & characterSetInfo. For every character calculate -// a typeInfo value, described by the types above, and a value payload. -for (cu in groupedCanonically) { - // The set of characters that canonicalize to cu - var characters = groupedCanonically[cu]; - - // If there is only one, it is unique. - if (characters.length == 1) { - typeInfo[characters[0]] = "CanonicalizeUnique:0"; - latinTypeInfo[characters[0]] = characters[0] <= MAX_LATIN ? "CanonicalizeLatinSelf:0" : "CanonicalizeLatinInvalid:0"; - continue; +function createUCS2CanonicalGroups() +{ + var groupedCanonically = []; + // Pass 1: populate groupedCanonically - this is mapping from canonicalized + // values back to the set of character code that canonicalize to them. + for (var i = 0; i <= MAX_UCS2; ++i) { + var ch = canonicalize(i); + if (!groupedCanonically[ch]) + groupedCanonically[ch] = []; + groupedCanonically[ch].push(i); } - // Sort the array. - characters.sort(function(x,y){return x-y;}); + return groupedCanonically; +} - // If there are more than two characters, create an entry in characterSetInfo. - if (characters.length > 2) { - for (i in characters) - typeInfo[characters[i]] = "CanonicalizeSet:" + characterSetInfo.length; - characterSetInfo.push(characters); +function createTables(prefix, maxValue, canonicalGroups) +{ + var prefixLower = prefix.toLowerCase(); + var prefixUpper = prefix.toUpperCase(); + var typeInfo = []; + var characterSetInfo = []; + // Pass 2: populate typeInfo & characterSetInfo. For every character calculate + // a typeInfo value, described by the types above, and a value payload. + for (cu in canonicalGroups) { + // The set of characters that canonicalize to cu + var characters = canonicalGroups[cu]; + + // If there is only one, it is unique. + if (characters.length == 1) { + typeInfo[characters[0]] = "CanonicalizeUnique:0"; + continue; + } - if (characters[1] <= MAX_LATIN) - throw new Error("sets with more than one latin character not supported!"); - if (characters[0] <= MAX_LATIN) { - for (i in characters) - latinTypeInfo[characters[i]] = "CanonicalizeLatinOther:" + characters[0]; - latinTypeInfo[characters[0]] = "CanonicalizeLatinSelf:0"; - } else { + // Sort the array. + characters.sort(function(x,y){return x-y;}); + + // If there are more than two characters, create an entry in characterSetInfo. + if (characters.length > 2) { for (i in characters) - latinTypeInfo[characters[i]] = "CanonicalizeLatinInvalid:0"; + typeInfo[characters[i]] = "CanonicalizeSet:" + characterSetInfo.length; + characterSetInfo.push(characters); + + continue; } - continue; + // We have a pair, mark alternating ranges, otherwise track whether this is the low or high partner. + var lo = characters[0]; + var hi = characters[1]; + var delta = hi - lo; + if (delta == 1) { + var type = lo & 1 ? "CanonicalizeAlternatingUnaligned:0" : "CanonicalizeAlternatingAligned:0"; + typeInfo[lo] = type; + typeInfo[hi] = type; + } else { + typeInfo[lo] = "CanonicalizeRangeLo:" + delta; + typeInfo[hi] = "CanonicalizeRangeHi:" + delta; + } } - // We have a pair, mark alternating ranges, otherwise track whether this is the low or high partner. - var lo = characters[0]; - var hi = characters[1]; - var delta = hi - lo; - if (delta == 1) { - var type = lo & 1 ? "CanonicalizeAlternatingUnaligned:0" : "CanonicalizeAlternatingAligned:0"; - typeInfo[lo] = type; - typeInfo[hi] = type; - } else { - typeInfo[lo] = "CanonicalizeRangeLo:" + delta; - typeInfo[hi] = "CanonicalizeRangeHi:" + delta; + var rangeInfo = []; + // Pass 3: coallesce types into ranges. + for (var end = 0; end <= maxValue; ++end) { + var begin = end; + var type = typeInfo[end]; + while (end < maxValue && typeInfo[end + 1] == type) + ++end; + rangeInfo.push({begin:begin, end:end, type:type}); } - if (lo > MAX_LATIN) { - latinTypeInfo[lo] = "CanonicalizeLatinInvalid:0"; - latinTypeInfo[hi] = "CanonicalizeLatinInvalid:0"; - } else if (hi > MAX_LATIN) { - latinTypeInfo[lo] = "CanonicalizeLatinSelf:0"; - latinTypeInfo[hi] = "CanonicalizeLatinOther:" + lo; - } else { - if (delta != 0x20 || lo & 0x20) - throw new Error("pairs of latin characters that don't mask with 0x20 not supported!"); - latinTypeInfo[lo] = "CanonicalizeLatinMask0x20:0"; - latinTypeInfo[hi] = "CanonicalizeLatinMask0x20:0"; + for (i in characterSetInfo) { + var characters = "" + var set = characterSetInfo[i]; + for (var j in set) + characters += hex(set[j]) + ", "; + print("const UChar32 " + prefixLower + "CharacterSet" + i + "[] = { " + characters + "0 };"); } + print(); + print("static const size_t " + prefixUpper + "_CANONICALIZATION_SETS = " + characterSetInfo.length + ";"); + print("const UChar32* const " + prefixLower + "CharacterSetInfo[" + prefixUpper + "_CANONICALIZATION_SETS] = {"); + for (i in characterSetInfo) + print(" " + prefixLower + "CharacterSet" + i + ","); + print("};"); + print(); + print("const size_t " + prefixUpper + "_CANONICALIZATION_RANGES = " + rangeInfo.length + ";"); + print("const CanonicalizationRange " + prefixLower + "RangeInfo[" + prefixUpper + "_CANONICALIZATION_RANGES] = {"); + for (i in rangeInfo) { + var info = rangeInfo[i]; + var typeAndValue = info.type.split(':'); + print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },"); + } + print("};"); + print(); } -var rangeInfo = []; -// Pass 3: coallesce types into ranges. -for (var end = 0; end <= MAX_UCS2; ++end) { - var begin = end; - var type = typeInfo[end]; - while (end < MAX_UCS2 && typeInfo[end + 1] == type) - ++end; - rangeInfo.push({begin:begin, end:end, type:type}); -} +printHeader(); -var latinRangeInfo = []; -// Pass 4: coallesce latin-1 types into ranges. -for (var end = 0; end <= MAX_UCS2; ++end) { - var begin = end; - var type = latinTypeInfo[end]; - while (end < MAX_UCS2 && latinTypeInfo[end + 1] == type) - ++end; - latinRangeInfo.push({begin:begin, end:end, type:type}); -} +createTables("UCS2", MAX_UCS2, createUCS2CanonicalGroups()); - -// Helper function to convert a number to a fixed width hex representation of a C uint16_t. -function hex(x) -{ - var s = Number(x).toString(16); - while (s.length < 4) - s = 0 + s; - return "0x" + s + "u"; -} - -var copyright = ( - "/*" + "\n" + - " * Copyright (C) 2012 Apple Inc. All rights reserved." + "\n" + - " *" + "\n" + - " * Redistribution and use in source and binary forms, with or without" + "\n" + - " * modification, are permitted provided that the following conditions" + "\n" + - " * are met:" + "\n" + - " * 1. Redistributions of source code must retain the above copyright" + "\n" + - " * notice, this list of conditions and the following disclaimer." + "\n" + - " * 2. Redistributions in binary form must reproduce the above copyright" + "\n" + - " * notice, this list of conditions and the following disclaimer in the" + "\n" + - " * documentation and/or other materials provided with the distribution." + "\n" + - " *" + "\n" + - " * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY" + "\n" + - " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE" + "\n" + - " * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR" + "\n" + - " * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR" + "\n" + - " * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL," + "\n" + - " * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO," + "\n" + - " * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR" + "\n" + - " * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY" + "\n" + - " * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT" + "\n" + - " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE" + "\n" + - " * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. " + "\n" + - " */"); - -print(copyright); -print(); -print("// DO NOT EDIT! - this file autogenerated by YarrCanonicalizeUCS2.js"); -print(); -print('#include "config.h"'); -print('#include "YarrCanonicalizeUCS2.h"'); -print(); -print("namespace JSC { namespace Yarr {"); -print(); -print("#include <stdint.h>"); -print(); - -for (i in characterSetInfo) { - var characters = "" - var set = characterSetInfo[i]; - for (var j in set) - characters += hex(set[j]) + ", "; - print("uint16_t ucs2CharacterSet" + i + "[] = { " + characters + "0 };"); -} -print(); -print("static const size_t UCS2_CANONICALIZATION_SETS = " + characterSetInfo.length + ";"); -print("uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = {"); -for (i in characterSetInfo) -print(" ucs2CharacterSet" + i + ","); -print("};"); -print(); -print("const size_t UCS2_CANONICALIZATION_RANGES = " + rangeInfo.length + ";"); -print("UCS2CanonicalizationRange rangeInfo[UCS2_CANONICALIZATION_RANGES] = {"); -for (i in rangeInfo) { - var info = rangeInfo[i]; - var typeAndValue = info.type.split(':'); - print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },"); -} -print("};"); -print(); -print("const size_t LATIN_CANONICALIZATION_RANGES = " + latinRangeInfo.length + ";"); -print("LatinCanonicalizationRange latinRangeInfo[LATIN_CANONICALIZATION_RANGES] = {"); -for (i in latinRangeInfo) { - var info = latinRangeInfo[i]; - var typeAndValue = info.type.split(':'); - print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },"); -} -print("};"); -print(); -print("} } // JSC::Yarr"); -print(); +printFooter(); diff --git a/src/3rdparty/masm/yarr/YarrCanonicalizeUnicode.cpp b/src/3rdparty/masm/yarr/YarrCanonicalizeUnicode.cpp new file mode 100644 index 0000000000..37bfc5e060 --- /dev/null +++ b/src/3rdparty/masm/yarr/YarrCanonicalizeUnicode.cpp @@ -0,0 +1,591 @@ +/* +* Copyright (C) 2016 Apple Inc. All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* +* THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY +* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY +* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// DO NO EDIT! - This file was generated by generateYarrCanonicalizeUnicode + +#include "config.h" +#include "YarrCanonicalize.h" + +namespace JSC { namespace Yarr { + +const UChar32 unicodeCharacterSet0[] = { 0x004b, 0x006b, 0x212a, 0 }; +const UChar32 unicodeCharacterSet1[] = { 0x0053, 0x0073, 0x017f, 0 }; +const UChar32 unicodeCharacterSet2[] = { 0x00c5, 0x00e5, 0x212b, 0 }; +const UChar32 unicodeCharacterSet3[] = { 0x01c4, 0x01c5, 0x01c6, 0 }; +const UChar32 unicodeCharacterSet4[] = { 0x01c7, 0x01c8, 0x01c9, 0 }; +const UChar32 unicodeCharacterSet5[] = { 0x01ca, 0x01cb, 0x01cc, 0 }; +const UChar32 unicodeCharacterSet6[] = { 0x01f1, 0x01f2, 0x01f3, 0 }; +const UChar32 unicodeCharacterSet7[] = { 0x0392, 0x03b2, 0x03d0, 0 }; +const UChar32 unicodeCharacterSet8[] = { 0x0395, 0x03b5, 0x03f5, 0 }; +const UChar32 unicodeCharacterSet9[] = { 0x0398, 0x03b8, 0x03d1, 0x03f4, 0 }; +const UChar32 unicodeCharacterSet10[] = { 0x0345, 0x0399, 0x03b9, 0x1fbe, 0 }; +const UChar32 unicodeCharacterSet11[] = { 0x039a, 0x03ba, 0x03f0, 0 }; +const UChar32 unicodeCharacterSet12[] = { 0x00b5, 0x039c, 0x03bc, 0 }; +const UChar32 unicodeCharacterSet13[] = { 0x03a0, 0x03c0, 0x03d6, 0 }; +const UChar32 unicodeCharacterSet14[] = { 0x03a1, 0x03c1, 0x03f1, 0 }; +const UChar32 unicodeCharacterSet15[] = { 0x03a3, 0x03c2, 0x03c3, 0 }; +const UChar32 unicodeCharacterSet16[] = { 0x03a6, 0x03c6, 0x03d5, 0 }; +const UChar32 unicodeCharacterSet17[] = { 0x03a9, 0x03c9, 0x2126, 0 }; +const UChar32 unicodeCharacterSet18[] = { 0x0412, 0x0432, 0x1c80, 0 }; +const UChar32 unicodeCharacterSet19[] = { 0x0414, 0x0434, 0x1c81, 0 }; +const UChar32 unicodeCharacterSet20[] = { 0x041e, 0x043e, 0x1c82, 0 }; +const UChar32 unicodeCharacterSet21[] = { 0x0421, 0x0441, 0x1c83, 0 }; +const UChar32 unicodeCharacterSet22[] = { 0x0422, 0x0442, 0x1c84, 0x1c85, 0 }; +const UChar32 unicodeCharacterSet23[] = { 0x042a, 0x044a, 0x1c86, 0 }; +const UChar32 unicodeCharacterSet24[] = { 0x0462, 0x0463, 0x1c87, 0 }; +const UChar32 unicodeCharacterSet25[] = { 0x1e60, 0x1e61, 0x1e9b, 0 }; +const UChar32 unicodeCharacterSet26[] = { 0x1c88, 0xa64a, 0xa64b, 0 }; + +static const size_t UNICODE_CANONICALIZATION_SETS = 27; +const UChar32* const unicodeCharacterSetInfo[UNICODE_CANONICALIZATION_SETS] = { + unicodeCharacterSet0, + unicodeCharacterSet1, + unicodeCharacterSet2, + unicodeCharacterSet3, + unicodeCharacterSet4, + unicodeCharacterSet5, + unicodeCharacterSet6, + unicodeCharacterSet7, + unicodeCharacterSet8, + unicodeCharacterSet9, + unicodeCharacterSet10, + unicodeCharacterSet11, + unicodeCharacterSet12, + unicodeCharacterSet13, + unicodeCharacterSet14, + unicodeCharacterSet15, + unicodeCharacterSet16, + unicodeCharacterSet17, + unicodeCharacterSet18, + unicodeCharacterSet19, + unicodeCharacterSet20, + unicodeCharacterSet21, + unicodeCharacterSet22, + unicodeCharacterSet23, + unicodeCharacterSet24, + unicodeCharacterSet25, + unicodeCharacterSet26, +}; + +const size_t UNICODE_CANONICALIZATION_RANGES = 495; +const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] = { + { 0x0000, 0x0040, 0x0000, CanonicalizeUnique }, + { 0x0041, 0x004a, 0x0020, CanonicalizeRangeLo }, + { 0x004b, 0x004b, 0x0000, CanonicalizeSet }, + { 0x004c, 0x0052, 0x0020, CanonicalizeRangeLo }, + { 0x0053, 0x0053, 0x0001, CanonicalizeSet }, + { 0x0054, 0x005a, 0x0020, CanonicalizeRangeLo }, + { 0x005b, 0x0060, 0x0000, CanonicalizeUnique }, + { 0x0061, 0x006a, 0x0020, CanonicalizeRangeHi }, + { 0x006b, 0x006b, 0x0000, CanonicalizeSet }, + { 0x006c, 0x0072, 0x0020, CanonicalizeRangeHi }, + { 0x0073, 0x0073, 0x0001, CanonicalizeSet }, + { 0x0074, 0x007a, 0x0020, CanonicalizeRangeHi }, + { 0x007b, 0x00b4, 0x0000, CanonicalizeUnique }, + { 0x00b5, 0x00b5, 0x000c, CanonicalizeSet }, + { 0x00b6, 0x00bf, 0x0000, CanonicalizeUnique }, + { 0x00c0, 0x00c4, 0x0020, CanonicalizeRangeLo }, + { 0x00c5, 0x00c5, 0x0002, CanonicalizeSet }, + { 0x00c6, 0x00d6, 0x0020, CanonicalizeRangeLo }, + { 0x00d7, 0x00d7, 0x0000, CanonicalizeUnique }, + { 0x00d8, 0x00de, 0x0020, CanonicalizeRangeLo }, + { 0x00df, 0x00df, 0x1dbf, CanonicalizeRangeLo }, + { 0x00e0, 0x00e4, 0x0020, CanonicalizeRangeHi }, + { 0x00e5, 0x00e5, 0x0002, CanonicalizeSet }, + { 0x00e6, 0x00f6, 0x0020, CanonicalizeRangeHi }, + { 0x00f7, 0x00f7, 0x0000, CanonicalizeUnique }, + { 0x00f8, 0x00fe, 0x0020, CanonicalizeRangeHi }, + { 0x00ff, 0x00ff, 0x0079, CanonicalizeRangeLo }, + { 0x0100, 0x012f, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0130, 0x0131, 0x0000, CanonicalizeUnique }, + { 0x0132, 0x0137, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0138, 0x0138, 0x0000, CanonicalizeUnique }, + { 0x0139, 0x0148, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x0149, 0x0149, 0x0000, CanonicalizeUnique }, + { 0x014a, 0x0177, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0178, 0x0178, 0x0079, CanonicalizeRangeHi }, + { 0x0179, 0x017e, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x017f, 0x017f, 0x0001, CanonicalizeSet }, + { 0x0180, 0x0180, 0x00c3, CanonicalizeRangeLo }, + { 0x0181, 0x0181, 0x00d2, CanonicalizeRangeLo }, + { 0x0182, 0x0185, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0186, 0x0186, 0x00ce, CanonicalizeRangeLo }, + { 0x0187, 0x0188, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x0189, 0x018a, 0x00cd, CanonicalizeRangeLo }, + { 0x018b, 0x018c, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x018d, 0x018d, 0x0000, CanonicalizeUnique }, + { 0x018e, 0x018e, 0x004f, CanonicalizeRangeLo }, + { 0x018f, 0x018f, 0x00ca, CanonicalizeRangeLo }, + { 0x0190, 0x0190, 0x00cb, CanonicalizeRangeLo }, + { 0x0191, 0x0192, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x0193, 0x0193, 0x00cd, CanonicalizeRangeLo }, + { 0x0194, 0x0194, 0x00cf, CanonicalizeRangeLo }, + { 0x0195, 0x0195, 0x0061, CanonicalizeRangeLo }, + { 0x0196, 0x0196, 0x00d3, CanonicalizeRangeLo }, + { 0x0197, 0x0197, 0x00d1, CanonicalizeRangeLo }, + { 0x0198, 0x0199, 0x0000, CanonicalizeAlternatingAligned }, + { 0x019a, 0x019a, 0x00a3, CanonicalizeRangeLo }, + { 0x019b, 0x019b, 0x0000, CanonicalizeUnique }, + { 0x019c, 0x019c, 0x00d3, CanonicalizeRangeLo }, + { 0x019d, 0x019d, 0x00d5, CanonicalizeRangeLo }, + { 0x019e, 0x019e, 0x0082, CanonicalizeRangeLo }, + { 0x019f, 0x019f, 0x00d6, CanonicalizeRangeLo }, + { 0x01a0, 0x01a5, 0x0000, CanonicalizeAlternatingAligned }, + { 0x01a6, 0x01a6, 0x00da, CanonicalizeRangeLo }, + { 0x01a7, 0x01a8, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x01a9, 0x01a9, 0x00da, CanonicalizeRangeLo }, + { 0x01aa, 0x01ab, 0x0000, CanonicalizeUnique }, + { 0x01ac, 0x01ad, 0x0000, CanonicalizeAlternatingAligned }, + { 0x01ae, 0x01ae, 0x00da, CanonicalizeRangeLo }, + { 0x01af, 0x01b0, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x01b1, 0x01b2, 0x00d9, CanonicalizeRangeLo }, + { 0x01b3, 0x01b6, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x01b7, 0x01b7, 0x00db, CanonicalizeRangeLo }, + { 0x01b8, 0x01b9, 0x0000, CanonicalizeAlternatingAligned }, + { 0x01ba, 0x01bb, 0x0000, CanonicalizeUnique }, + { 0x01bc, 0x01bd, 0x0000, CanonicalizeAlternatingAligned }, + { 0x01be, 0x01be, 0x0000, CanonicalizeUnique }, + { 0x01bf, 0x01bf, 0x0038, CanonicalizeRangeLo }, + { 0x01c0, 0x01c3, 0x0000, CanonicalizeUnique }, + { 0x01c4, 0x01c6, 0x0003, CanonicalizeSet }, + { 0x01c7, 0x01c9, 0x0004, CanonicalizeSet }, + { 0x01ca, 0x01cc, 0x0005, CanonicalizeSet }, + { 0x01cd, 0x01dc, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x01dd, 0x01dd, 0x004f, CanonicalizeRangeHi }, + { 0x01de, 0x01ef, 0x0000, CanonicalizeAlternatingAligned }, + { 0x01f0, 0x01f0, 0x0000, CanonicalizeUnique }, + { 0x01f1, 0x01f3, 0x0006, CanonicalizeSet }, + { 0x01f4, 0x01f5, 0x0000, CanonicalizeAlternatingAligned }, + { 0x01f6, 0x01f6, 0x0061, CanonicalizeRangeHi }, + { 0x01f7, 0x01f7, 0x0038, CanonicalizeRangeHi }, + { 0x01f8, 0x021f, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0220, 0x0220, 0x0082, CanonicalizeRangeHi }, + { 0x0221, 0x0221, 0x0000, CanonicalizeUnique }, + { 0x0222, 0x0233, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0234, 0x0239, 0x0000, CanonicalizeUnique }, + { 0x023a, 0x023a, 0x2a2b, CanonicalizeRangeLo }, + { 0x023b, 0x023c, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x023d, 0x023d, 0x00a3, CanonicalizeRangeHi }, + { 0x023e, 0x023e, 0x2a28, CanonicalizeRangeLo }, + { 0x023f, 0x0240, 0x2a3f, CanonicalizeRangeLo }, + { 0x0241, 0x0242, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x0243, 0x0243, 0x00c3, CanonicalizeRangeHi }, + { 0x0244, 0x0244, 0x0045, CanonicalizeRangeLo }, + { 0x0245, 0x0245, 0x0047, CanonicalizeRangeLo }, + { 0x0246, 0x024f, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0250, 0x0250, 0x2a1f, CanonicalizeRangeLo }, + { 0x0251, 0x0251, 0x2a1c, CanonicalizeRangeLo }, + { 0x0252, 0x0252, 0x2a1e, CanonicalizeRangeLo }, + { 0x0253, 0x0253, 0x00d2, CanonicalizeRangeHi }, + { 0x0254, 0x0254, 0x00ce, CanonicalizeRangeHi }, + { 0x0255, 0x0255, 0x0000, CanonicalizeUnique }, + { 0x0256, 0x0257, 0x00cd, CanonicalizeRangeHi }, + { 0x0258, 0x0258, 0x0000, CanonicalizeUnique }, + { 0x0259, 0x0259, 0x00ca, CanonicalizeRangeHi }, + { 0x025a, 0x025a, 0x0000, CanonicalizeUnique }, + { 0x025b, 0x025b, 0x00cb, CanonicalizeRangeHi }, + { 0x025c, 0x025c, 0xa54f, CanonicalizeRangeLo }, + { 0x025d, 0x025f, 0x0000, CanonicalizeUnique }, + { 0x0260, 0x0260, 0x00cd, CanonicalizeRangeHi }, + { 0x0261, 0x0261, 0xa54b, CanonicalizeRangeLo }, + { 0x0262, 0x0262, 0x0000, CanonicalizeUnique }, + { 0x0263, 0x0263, 0x00cf, CanonicalizeRangeHi }, + { 0x0264, 0x0264, 0x0000, CanonicalizeUnique }, + { 0x0265, 0x0265, 0xa528, CanonicalizeRangeLo }, + { 0x0266, 0x0266, 0xa544, CanonicalizeRangeLo }, + { 0x0267, 0x0267, 0x0000, CanonicalizeUnique }, + { 0x0268, 0x0268, 0x00d1, CanonicalizeRangeHi }, + { 0x0269, 0x0269, 0x00d3, CanonicalizeRangeHi }, + { 0x026a, 0x026a, 0xa544, CanonicalizeRangeLo }, + { 0x026b, 0x026b, 0x29f7, CanonicalizeRangeLo }, + { 0x026c, 0x026c, 0xa541, CanonicalizeRangeLo }, + { 0x026d, 0x026e, 0x0000, CanonicalizeUnique }, + { 0x026f, 0x026f, 0x00d3, CanonicalizeRangeHi }, + { 0x0270, 0x0270, 0x0000, CanonicalizeUnique }, + { 0x0271, 0x0271, 0x29fd, CanonicalizeRangeLo }, + { 0x0272, 0x0272, 0x00d5, CanonicalizeRangeHi }, + { 0x0273, 0x0274, 0x0000, CanonicalizeUnique }, + { 0x0275, 0x0275, 0x00d6, CanonicalizeRangeHi }, + { 0x0276, 0x027c, 0x0000, CanonicalizeUnique }, + { 0x027d, 0x027d, 0x29e7, CanonicalizeRangeLo }, + { 0x027e, 0x027f, 0x0000, CanonicalizeUnique }, + { 0x0280, 0x0280, 0x00da, CanonicalizeRangeHi }, + { 0x0281, 0x0282, 0x0000, CanonicalizeUnique }, + { 0x0283, 0x0283, 0x00da, CanonicalizeRangeHi }, + { 0x0284, 0x0286, 0x0000, CanonicalizeUnique }, + { 0x0287, 0x0287, 0xa52a, CanonicalizeRangeLo }, + { 0x0288, 0x0288, 0x00da, CanonicalizeRangeHi }, + { 0x0289, 0x0289, 0x0045, CanonicalizeRangeHi }, + { 0x028a, 0x028b, 0x00d9, CanonicalizeRangeHi }, + { 0x028c, 0x028c, 0x0047, CanonicalizeRangeHi }, + { 0x028d, 0x0291, 0x0000, CanonicalizeUnique }, + { 0x0292, 0x0292, 0x00db, CanonicalizeRangeHi }, + { 0x0293, 0x029c, 0x0000, CanonicalizeUnique }, + { 0x029d, 0x029d, 0xa515, CanonicalizeRangeLo }, + { 0x029e, 0x029e, 0xa512, CanonicalizeRangeLo }, + { 0x029f, 0x0344, 0x0000, CanonicalizeUnique }, + { 0x0345, 0x0345, 0x000a, CanonicalizeSet }, + { 0x0346, 0x036f, 0x0000, CanonicalizeUnique }, + { 0x0370, 0x0373, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0374, 0x0375, 0x0000, CanonicalizeUnique }, + { 0x0376, 0x0377, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0378, 0x037a, 0x0000, CanonicalizeUnique }, + { 0x037b, 0x037d, 0x0082, CanonicalizeRangeLo }, + { 0x037e, 0x037e, 0x0000, CanonicalizeUnique }, + { 0x037f, 0x037f, 0x0074, CanonicalizeRangeLo }, + { 0x0380, 0x0385, 0x0000, CanonicalizeUnique }, + { 0x0386, 0x0386, 0x0026, CanonicalizeRangeLo }, + { 0x0387, 0x0387, 0x0000, CanonicalizeUnique }, + { 0x0388, 0x038a, 0x0025, CanonicalizeRangeLo }, + { 0x038b, 0x038b, 0x0000, CanonicalizeUnique }, + { 0x038c, 0x038c, 0x0040, CanonicalizeRangeLo }, + { 0x038d, 0x038d, 0x0000, CanonicalizeUnique }, + { 0x038e, 0x038f, 0x003f, CanonicalizeRangeLo }, + { 0x0390, 0x0390, 0x0000, CanonicalizeUnique }, + { 0x0391, 0x0391, 0x0020, CanonicalizeRangeLo }, + { 0x0392, 0x0392, 0x0007, CanonicalizeSet }, + { 0x0393, 0x0394, 0x0020, CanonicalizeRangeLo }, + { 0x0395, 0x0395, 0x0008, CanonicalizeSet }, + { 0x0396, 0x0397, 0x0020, CanonicalizeRangeLo }, + { 0x0398, 0x0398, 0x0009, CanonicalizeSet }, + { 0x0399, 0x0399, 0x000a, CanonicalizeSet }, + { 0x039a, 0x039a, 0x000b, CanonicalizeSet }, + { 0x039b, 0x039b, 0x0020, CanonicalizeRangeLo }, + { 0x039c, 0x039c, 0x000c, CanonicalizeSet }, + { 0x039d, 0x039f, 0x0020, CanonicalizeRangeLo }, + { 0x03a0, 0x03a0, 0x000d, CanonicalizeSet }, + { 0x03a1, 0x03a1, 0x000e, CanonicalizeSet }, + { 0x03a2, 0x03a2, 0x0000, CanonicalizeUnique }, + { 0x03a3, 0x03a3, 0x000f, CanonicalizeSet }, + { 0x03a4, 0x03a5, 0x0020, CanonicalizeRangeLo }, + { 0x03a6, 0x03a6, 0x0010, CanonicalizeSet }, + { 0x03a7, 0x03a8, 0x0020, CanonicalizeRangeLo }, + { 0x03a9, 0x03a9, 0x0011, CanonicalizeSet }, + { 0x03aa, 0x03ab, 0x0020, CanonicalizeRangeLo }, + { 0x03ac, 0x03ac, 0x0026, CanonicalizeRangeHi }, + { 0x03ad, 0x03af, 0x0025, CanonicalizeRangeHi }, + { 0x03b0, 0x03b0, 0x0000, CanonicalizeUnique }, + { 0x03b1, 0x03b1, 0x0020, CanonicalizeRangeHi }, + { 0x03b2, 0x03b2, 0x0007, CanonicalizeSet }, + { 0x03b3, 0x03b4, 0x0020, CanonicalizeRangeHi }, + { 0x03b5, 0x03b5, 0x0008, CanonicalizeSet }, + { 0x03b6, 0x03b7, 0x0020, CanonicalizeRangeHi }, + { 0x03b8, 0x03b8, 0x0009, CanonicalizeSet }, + { 0x03b9, 0x03b9, 0x000a, CanonicalizeSet }, + { 0x03ba, 0x03ba, 0x000b, CanonicalizeSet }, + { 0x03bb, 0x03bb, 0x0020, CanonicalizeRangeHi }, + { 0x03bc, 0x03bc, 0x000c, CanonicalizeSet }, + { 0x03bd, 0x03bf, 0x0020, CanonicalizeRangeHi }, + { 0x03c0, 0x03c0, 0x000d, CanonicalizeSet }, + { 0x03c1, 0x03c1, 0x000e, CanonicalizeSet }, + { 0x03c2, 0x03c3, 0x000f, CanonicalizeSet }, + { 0x03c4, 0x03c5, 0x0020, CanonicalizeRangeHi }, + { 0x03c6, 0x03c6, 0x0010, CanonicalizeSet }, + { 0x03c7, 0x03c8, 0x0020, CanonicalizeRangeHi }, + { 0x03c9, 0x03c9, 0x0011, CanonicalizeSet }, + { 0x03ca, 0x03cb, 0x0020, CanonicalizeRangeHi }, + { 0x03cc, 0x03cc, 0x0040, CanonicalizeRangeHi }, + { 0x03cd, 0x03ce, 0x003f, CanonicalizeRangeHi }, + { 0x03cf, 0x03cf, 0x0008, CanonicalizeRangeLo }, + { 0x03d0, 0x03d0, 0x0007, CanonicalizeSet }, + { 0x03d1, 0x03d1, 0x0009, CanonicalizeSet }, + { 0x03d2, 0x03d4, 0x0000, CanonicalizeUnique }, + { 0x03d5, 0x03d5, 0x0010, CanonicalizeSet }, + { 0x03d6, 0x03d6, 0x000d, CanonicalizeSet }, + { 0x03d7, 0x03d7, 0x0008, CanonicalizeRangeHi }, + { 0x03d8, 0x03ef, 0x0000, CanonicalizeAlternatingAligned }, + { 0x03f0, 0x03f0, 0x000b, CanonicalizeSet }, + { 0x03f1, 0x03f1, 0x000e, CanonicalizeSet }, + { 0x03f2, 0x03f2, 0x0007, CanonicalizeRangeLo }, + { 0x03f3, 0x03f3, 0x0074, CanonicalizeRangeHi }, + { 0x03f4, 0x03f4, 0x0009, CanonicalizeSet }, + { 0x03f5, 0x03f5, 0x0008, CanonicalizeSet }, + { 0x03f6, 0x03f6, 0x0000, CanonicalizeUnique }, + { 0x03f7, 0x03f8, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x03f9, 0x03f9, 0x0007, CanonicalizeRangeHi }, + { 0x03fa, 0x03fb, 0x0000, CanonicalizeAlternatingAligned }, + { 0x03fc, 0x03fc, 0x0000, CanonicalizeUnique }, + { 0x03fd, 0x03ff, 0x0082, CanonicalizeRangeHi }, + { 0x0400, 0x040f, 0x0050, CanonicalizeRangeLo }, + { 0x0410, 0x0411, 0x0020, CanonicalizeRangeLo }, + { 0x0412, 0x0412, 0x0012, CanonicalizeSet }, + { 0x0413, 0x0413, 0x0020, CanonicalizeRangeLo }, + { 0x0414, 0x0414, 0x0013, CanonicalizeSet }, + { 0x0415, 0x041d, 0x0020, CanonicalizeRangeLo }, + { 0x041e, 0x041e, 0x0014, CanonicalizeSet }, + { 0x041f, 0x0420, 0x0020, CanonicalizeRangeLo }, + { 0x0421, 0x0421, 0x0015, CanonicalizeSet }, + { 0x0422, 0x0422, 0x0016, CanonicalizeSet }, + { 0x0423, 0x0429, 0x0020, CanonicalizeRangeLo }, + { 0x042a, 0x042a, 0x0017, CanonicalizeSet }, + { 0x042b, 0x042f, 0x0020, CanonicalizeRangeLo }, + { 0x0430, 0x0431, 0x0020, CanonicalizeRangeHi }, + { 0x0432, 0x0432, 0x0012, CanonicalizeSet }, + { 0x0433, 0x0433, 0x0020, CanonicalizeRangeHi }, + { 0x0434, 0x0434, 0x0013, CanonicalizeSet }, + { 0x0435, 0x043d, 0x0020, CanonicalizeRangeHi }, + { 0x043e, 0x043e, 0x0014, CanonicalizeSet }, + { 0x043f, 0x0440, 0x0020, CanonicalizeRangeHi }, + { 0x0441, 0x0441, 0x0015, CanonicalizeSet }, + { 0x0442, 0x0442, 0x0016, CanonicalizeSet }, + { 0x0443, 0x0449, 0x0020, CanonicalizeRangeHi }, + { 0x044a, 0x044a, 0x0017, CanonicalizeSet }, + { 0x044b, 0x044f, 0x0020, CanonicalizeRangeHi }, + { 0x0450, 0x045f, 0x0050, CanonicalizeRangeHi }, + { 0x0460, 0x0461, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0462, 0x0463, 0x0018, CanonicalizeSet }, + { 0x0464, 0x0481, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0482, 0x0489, 0x0000, CanonicalizeUnique }, + { 0x048a, 0x04bf, 0x0000, CanonicalizeAlternatingAligned }, + { 0x04c0, 0x04c0, 0x000f, CanonicalizeRangeLo }, + { 0x04c1, 0x04ce, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x04cf, 0x04cf, 0x000f, CanonicalizeRangeHi }, + { 0x04d0, 0x052f, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0530, 0x0530, 0x0000, CanonicalizeUnique }, + { 0x0531, 0x0556, 0x0030, CanonicalizeRangeLo }, + { 0x0557, 0x0560, 0x0000, CanonicalizeUnique }, + { 0x0561, 0x0586, 0x0030, CanonicalizeRangeHi }, + { 0x0587, 0x109f, 0x0000, CanonicalizeUnique }, + { 0x10a0, 0x10c5, 0x1c60, CanonicalizeRangeLo }, + { 0x10c6, 0x10c6, 0x0000, CanonicalizeUnique }, + { 0x10c7, 0x10c7, 0x1c60, CanonicalizeRangeLo }, + { 0x10c8, 0x10cc, 0x0000, CanonicalizeUnique }, + { 0x10cd, 0x10cd, 0x1c60, CanonicalizeRangeLo }, + { 0x10ce, 0x139f, 0x0000, CanonicalizeUnique }, + { 0x13a0, 0x13ef, 0x97d0, CanonicalizeRangeLo }, + { 0x13f0, 0x13f5, 0x0008, CanonicalizeRangeLo }, + { 0x13f6, 0x13f7, 0x0000, CanonicalizeUnique }, + { 0x13f8, 0x13fd, 0x0008, CanonicalizeRangeHi }, + { 0x13fe, 0x1c7f, 0x0000, CanonicalizeUnique }, + { 0x1c80, 0x1c80, 0x0012, CanonicalizeSet }, + { 0x1c81, 0x1c81, 0x0013, CanonicalizeSet }, + { 0x1c82, 0x1c82, 0x0014, CanonicalizeSet }, + { 0x1c83, 0x1c83, 0x0015, CanonicalizeSet }, + { 0x1c84, 0x1c85, 0x0016, CanonicalizeSet }, + { 0x1c86, 0x1c86, 0x0017, CanonicalizeSet }, + { 0x1c87, 0x1c87, 0x0018, CanonicalizeSet }, + { 0x1c88, 0x1c88, 0x001a, CanonicalizeSet }, + { 0x1c89, 0x1d78, 0x0000, CanonicalizeUnique }, + { 0x1d79, 0x1d79, 0x8a04, CanonicalizeRangeLo }, + { 0x1d7a, 0x1d7c, 0x0000, CanonicalizeUnique }, + { 0x1d7d, 0x1d7d, 0x0ee6, CanonicalizeRangeLo }, + { 0x1d7e, 0x1dff, 0x0000, CanonicalizeUnique }, + { 0x1e00, 0x1e5f, 0x0000, CanonicalizeAlternatingAligned }, + { 0x1e60, 0x1e61, 0x0019, CanonicalizeSet }, + { 0x1e62, 0x1e95, 0x0000, CanonicalizeAlternatingAligned }, + { 0x1e96, 0x1e9a, 0x0000, CanonicalizeUnique }, + { 0x1e9b, 0x1e9b, 0x0019, CanonicalizeSet }, + { 0x1e9c, 0x1e9d, 0x0000, CanonicalizeUnique }, + { 0x1e9e, 0x1e9e, 0x1dbf, CanonicalizeRangeHi }, + { 0x1e9f, 0x1e9f, 0x0000, CanonicalizeUnique }, + { 0x1ea0, 0x1eff, 0x0000, CanonicalizeAlternatingAligned }, + { 0x1f00, 0x1f07, 0x0008, CanonicalizeRangeLo }, + { 0x1f08, 0x1f0f, 0x0008, CanonicalizeRangeHi }, + { 0x1f10, 0x1f15, 0x0008, CanonicalizeRangeLo }, + { 0x1f16, 0x1f17, 0x0000, CanonicalizeUnique }, + { 0x1f18, 0x1f1d, 0x0008, CanonicalizeRangeHi }, + { 0x1f1e, 0x1f1f, 0x0000, CanonicalizeUnique }, + { 0x1f20, 0x1f27, 0x0008, CanonicalizeRangeLo }, + { 0x1f28, 0x1f2f, 0x0008, CanonicalizeRangeHi }, + { 0x1f30, 0x1f37, 0x0008, CanonicalizeRangeLo }, + { 0x1f38, 0x1f3f, 0x0008, CanonicalizeRangeHi }, + { 0x1f40, 0x1f45, 0x0008, CanonicalizeRangeLo }, + { 0x1f46, 0x1f47, 0x0000, CanonicalizeUnique }, + { 0x1f48, 0x1f4d, 0x0008, CanonicalizeRangeHi }, + { 0x1f4e, 0x1f50, 0x0000, CanonicalizeUnique }, + { 0x1f51, 0x1f51, 0x0008, CanonicalizeRangeLo }, + { 0x1f52, 0x1f52, 0x0000, CanonicalizeUnique }, + { 0x1f53, 0x1f53, 0x0008, CanonicalizeRangeLo }, + { 0x1f54, 0x1f54, 0x0000, CanonicalizeUnique }, + { 0x1f55, 0x1f55, 0x0008, CanonicalizeRangeLo }, + { 0x1f56, 0x1f56, 0x0000, CanonicalizeUnique }, + { 0x1f57, 0x1f57, 0x0008, CanonicalizeRangeLo }, + { 0x1f58, 0x1f58, 0x0000, CanonicalizeUnique }, + { 0x1f59, 0x1f59, 0x0008, CanonicalizeRangeHi }, + { 0x1f5a, 0x1f5a, 0x0000, CanonicalizeUnique }, + { 0x1f5b, 0x1f5b, 0x0008, CanonicalizeRangeHi }, + { 0x1f5c, 0x1f5c, 0x0000, CanonicalizeUnique }, + { 0x1f5d, 0x1f5d, 0x0008, CanonicalizeRangeHi }, + { 0x1f5e, 0x1f5e, 0x0000, CanonicalizeUnique }, + { 0x1f5f, 0x1f5f, 0x0008, CanonicalizeRangeHi }, + { 0x1f60, 0x1f67, 0x0008, CanonicalizeRangeLo }, + { 0x1f68, 0x1f6f, 0x0008, CanonicalizeRangeHi }, + { 0x1f70, 0x1f71, 0x004a, CanonicalizeRangeLo }, + { 0x1f72, 0x1f75, 0x0056, CanonicalizeRangeLo }, + { 0x1f76, 0x1f77, 0x0064, CanonicalizeRangeLo }, + { 0x1f78, 0x1f79, 0x0080, CanonicalizeRangeLo }, + { 0x1f7a, 0x1f7b, 0x0070, CanonicalizeRangeLo }, + { 0x1f7c, 0x1f7d, 0x007e, CanonicalizeRangeLo }, + { 0x1f7e, 0x1f7f, 0x0000, CanonicalizeUnique }, + { 0x1f80, 0x1f87, 0x0008, CanonicalizeRangeLo }, + { 0x1f88, 0x1f8f, 0x0008, CanonicalizeRangeHi }, + { 0x1f90, 0x1f97, 0x0008, CanonicalizeRangeLo }, + { 0x1f98, 0x1f9f, 0x0008, CanonicalizeRangeHi }, + { 0x1fa0, 0x1fa7, 0x0008, CanonicalizeRangeLo }, + { 0x1fa8, 0x1faf, 0x0008, CanonicalizeRangeHi }, + { 0x1fb0, 0x1fb1, 0x0008, CanonicalizeRangeLo }, + { 0x1fb2, 0x1fb2, 0x0000, CanonicalizeUnique }, + { 0x1fb3, 0x1fb3, 0x0009, CanonicalizeRangeLo }, + { 0x1fb4, 0x1fb7, 0x0000, CanonicalizeUnique }, + { 0x1fb8, 0x1fb9, 0x0008, CanonicalizeRangeHi }, + { 0x1fba, 0x1fbb, 0x004a, CanonicalizeRangeHi }, + { 0x1fbc, 0x1fbc, 0x0009, CanonicalizeRangeHi }, + { 0x1fbd, 0x1fbd, 0x0000, CanonicalizeUnique }, + { 0x1fbe, 0x1fbe, 0x000a, CanonicalizeSet }, + { 0x1fbf, 0x1fc2, 0x0000, CanonicalizeUnique }, + { 0x1fc3, 0x1fc3, 0x0009, CanonicalizeRangeLo }, + { 0x1fc4, 0x1fc7, 0x0000, CanonicalizeUnique }, + { 0x1fc8, 0x1fcb, 0x0056, CanonicalizeRangeHi }, + { 0x1fcc, 0x1fcc, 0x0009, CanonicalizeRangeHi }, + { 0x1fcd, 0x1fcf, 0x0000, CanonicalizeUnique }, + { 0x1fd0, 0x1fd1, 0x0008, CanonicalizeRangeLo }, + { 0x1fd2, 0x1fd7, 0x0000, CanonicalizeUnique }, + { 0x1fd8, 0x1fd9, 0x0008, CanonicalizeRangeHi }, + { 0x1fda, 0x1fdb, 0x0064, CanonicalizeRangeHi }, + { 0x1fdc, 0x1fdf, 0x0000, CanonicalizeUnique }, + { 0x1fe0, 0x1fe1, 0x0008, CanonicalizeRangeLo }, + { 0x1fe2, 0x1fe4, 0x0000, CanonicalizeUnique }, + { 0x1fe5, 0x1fe5, 0x0007, CanonicalizeRangeLo }, + { 0x1fe6, 0x1fe7, 0x0000, CanonicalizeUnique }, + { 0x1fe8, 0x1fe9, 0x0008, CanonicalizeRangeHi }, + { 0x1fea, 0x1feb, 0x0070, CanonicalizeRangeHi }, + { 0x1fec, 0x1fec, 0x0007, CanonicalizeRangeHi }, + { 0x1fed, 0x1ff2, 0x0000, CanonicalizeUnique }, + { 0x1ff3, 0x1ff3, 0x0009, CanonicalizeRangeLo }, + { 0x1ff4, 0x1ff7, 0x0000, CanonicalizeUnique }, + { 0x1ff8, 0x1ff9, 0x0080, CanonicalizeRangeHi }, + { 0x1ffa, 0x1ffb, 0x007e, CanonicalizeRangeHi }, + { 0x1ffc, 0x1ffc, 0x0009, CanonicalizeRangeHi }, + { 0x1ffd, 0x2125, 0x0000, CanonicalizeUnique }, + { 0x2126, 0x2126, 0x0011, CanonicalizeSet }, + { 0x2127, 0x2129, 0x0000, CanonicalizeUnique }, + { 0x212a, 0x212a, 0x0000, CanonicalizeSet }, + { 0x212b, 0x212b, 0x0002, CanonicalizeSet }, + { 0x212c, 0x2131, 0x0000, CanonicalizeUnique }, + { 0x2132, 0x2132, 0x001c, CanonicalizeRangeLo }, + { 0x2133, 0x214d, 0x0000, CanonicalizeUnique }, + { 0x214e, 0x214e, 0x001c, CanonicalizeRangeHi }, + { 0x214f, 0x215f, 0x0000, CanonicalizeUnique }, + { 0x2160, 0x216f, 0x0010, CanonicalizeRangeLo }, + { 0x2170, 0x217f, 0x0010, CanonicalizeRangeHi }, + { 0x2180, 0x2182, 0x0000, CanonicalizeUnique }, + { 0x2183, 0x2184, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x2185, 0x24b5, 0x0000, CanonicalizeUnique }, + { 0x24b6, 0x24cf, 0x001a, CanonicalizeRangeLo }, + { 0x24d0, 0x24e9, 0x001a, CanonicalizeRangeHi }, + { 0x24ea, 0x2bff, 0x0000, CanonicalizeUnique }, + { 0x2c00, 0x2c2e, 0x0030, CanonicalizeRangeLo }, + { 0x2c2f, 0x2c2f, 0x0000, CanonicalizeUnique }, + { 0x2c30, 0x2c5e, 0x0030, CanonicalizeRangeHi }, + { 0x2c5f, 0x2c5f, 0x0000, CanonicalizeUnique }, + { 0x2c60, 0x2c61, 0x0000, CanonicalizeAlternatingAligned }, + { 0x2c62, 0x2c62, 0x29f7, CanonicalizeRangeHi }, + { 0x2c63, 0x2c63, 0x0ee6, CanonicalizeRangeHi }, + { 0x2c64, 0x2c64, 0x29e7, CanonicalizeRangeHi }, + { 0x2c65, 0x2c65, 0x2a2b, CanonicalizeRangeHi }, + { 0x2c66, 0x2c66, 0x2a28, CanonicalizeRangeHi }, + { 0x2c67, 0x2c6c, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x2c6d, 0x2c6d, 0x2a1c, CanonicalizeRangeHi }, + { 0x2c6e, 0x2c6e, 0x29fd, CanonicalizeRangeHi }, + { 0x2c6f, 0x2c6f, 0x2a1f, CanonicalizeRangeHi }, + { 0x2c70, 0x2c70, 0x2a1e, CanonicalizeRangeHi }, + { 0x2c71, 0x2c71, 0x0000, CanonicalizeUnique }, + { 0x2c72, 0x2c73, 0x0000, CanonicalizeAlternatingAligned }, + { 0x2c74, 0x2c74, 0x0000, CanonicalizeUnique }, + { 0x2c75, 0x2c76, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x2c77, 0x2c7d, 0x0000, CanonicalizeUnique }, + { 0x2c7e, 0x2c7f, 0x2a3f, CanonicalizeRangeHi }, + { 0x2c80, 0x2ce3, 0x0000, CanonicalizeAlternatingAligned }, + { 0x2ce4, 0x2cea, 0x0000, CanonicalizeUnique }, + { 0x2ceb, 0x2cee, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0x2cef, 0x2cf1, 0x0000, CanonicalizeUnique }, + { 0x2cf2, 0x2cf3, 0x0000, CanonicalizeAlternatingAligned }, + { 0x2cf4, 0x2cff, 0x0000, CanonicalizeUnique }, + { 0x2d00, 0x2d25, 0x1c60, CanonicalizeRangeHi }, + { 0x2d26, 0x2d26, 0x0000, CanonicalizeUnique }, + { 0x2d27, 0x2d27, 0x1c60, CanonicalizeRangeHi }, + { 0x2d28, 0x2d2c, 0x0000, CanonicalizeUnique }, + { 0x2d2d, 0x2d2d, 0x1c60, CanonicalizeRangeHi }, + { 0x2d2e, 0xa63f, 0x0000, CanonicalizeUnique }, + { 0xa640, 0xa649, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa64a, 0xa64b, 0x001a, CanonicalizeSet }, + { 0xa64c, 0xa66d, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa66e, 0xa67f, 0x0000, CanonicalizeUnique }, + { 0xa680, 0xa69b, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa69c, 0xa721, 0x0000, CanonicalizeUnique }, + { 0xa722, 0xa72f, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa730, 0xa731, 0x0000, CanonicalizeUnique }, + { 0xa732, 0xa76f, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa770, 0xa778, 0x0000, CanonicalizeUnique }, + { 0xa779, 0xa77c, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0xa77d, 0xa77d, 0x8a04, CanonicalizeRangeHi }, + { 0xa77e, 0xa787, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa788, 0xa78a, 0x0000, CanonicalizeUnique }, + { 0xa78b, 0xa78c, 0x0000, CanonicalizeAlternatingUnaligned }, + { 0xa78d, 0xa78d, 0xa528, CanonicalizeRangeHi }, + { 0xa78e, 0xa78f, 0x0000, CanonicalizeUnique }, + { 0xa790, 0xa793, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa794, 0xa795, 0x0000, CanonicalizeUnique }, + { 0xa796, 0xa7a9, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa7aa, 0xa7aa, 0xa544, CanonicalizeRangeHi }, + { 0xa7ab, 0xa7ab, 0xa54f, CanonicalizeRangeHi }, + { 0xa7ac, 0xa7ac, 0xa54b, CanonicalizeRangeHi }, + { 0xa7ad, 0xa7ad, 0xa541, CanonicalizeRangeHi }, + { 0xa7ae, 0xa7ae, 0xa544, CanonicalizeRangeHi }, + { 0xa7af, 0xa7af, 0x0000, CanonicalizeUnique }, + { 0xa7b0, 0xa7b0, 0xa512, CanonicalizeRangeHi }, + { 0xa7b1, 0xa7b1, 0xa52a, CanonicalizeRangeHi }, + { 0xa7b2, 0xa7b2, 0xa515, CanonicalizeRangeHi }, + { 0xa7b3, 0xa7b3, 0x03a0, CanonicalizeRangeLo }, + { 0xa7b4, 0xa7b7, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa7b8, 0xab52, 0x0000, CanonicalizeUnique }, + { 0xab53, 0xab53, 0x03a0, CanonicalizeRangeHi }, + { 0xab54, 0xab6f, 0x0000, CanonicalizeUnique }, + { 0xab70, 0xabbf, 0x97d0, CanonicalizeRangeHi }, + { 0xabc0, 0xff20, 0x0000, CanonicalizeUnique }, + { 0xff21, 0xff3a, 0x0020, CanonicalizeRangeLo }, + { 0xff3b, 0xff40, 0x0000, CanonicalizeUnique }, + { 0xff41, 0xff5a, 0x0020, CanonicalizeRangeHi }, + { 0xff5b, 0x103ff, 0x0000, CanonicalizeUnique }, + { 0x10400, 0x10427, 0x0028, CanonicalizeRangeLo }, + { 0x10428, 0x1044f, 0x0028, CanonicalizeRangeHi }, + { 0x10450, 0x104af, 0x0000, CanonicalizeUnique }, + { 0x104b0, 0x104d3, 0x0028, CanonicalizeRangeLo }, + { 0x104d4, 0x104d7, 0x0000, CanonicalizeUnique }, + { 0x104d8, 0x104fb, 0x0028, CanonicalizeRangeHi }, + { 0x104fc, 0x10c7f, 0x0000, CanonicalizeUnique }, + { 0x10c80, 0x10cb2, 0x0040, CanonicalizeRangeLo }, + { 0x10cb3, 0x10cbf, 0x0000, CanonicalizeUnique }, + { 0x10cc0, 0x10cf2, 0x0040, CanonicalizeRangeHi }, + { 0x10cf3, 0x1189f, 0x0000, CanonicalizeUnique }, + { 0x118a0, 0x118bf, 0x0020, CanonicalizeRangeLo }, + { 0x118c0, 0x118df, 0x0020, CanonicalizeRangeHi }, + { 0x118e0, 0x1e8ff, 0x0000, CanonicalizeUnique }, + { 0x1e900, 0x1e921, 0x0022, CanonicalizeRangeLo }, + { 0x1e922, 0x1e943, 0x0022, CanonicalizeRangeHi }, + { 0x1e944, 0x10ffff, 0x0000, CanonicalizeUnique }, +}; + +} } // JSC::Yarr diff --git a/src/3rdparty/masm/yarr/YarrErrorCode.cpp b/src/3rdparty/masm/yarr/YarrErrorCode.cpp new file mode 100644 index 0000000000..aaebd4613d --- /dev/null +++ b/src/3rdparty/masm/yarr/YarrErrorCode.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2017 Yusuke Suzuki <utatane.tea@gmail.com>. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "YarrErrorCode.h" + +#include "Error.h" + +namespace JSC { namespace Yarr { + +const char* errorMessage(ErrorCode error) +{ +#define REGEXP_ERROR_PREFIX "Invalid regular expression: " + // The order of this array must match the ErrorCode enum. + static const char* errorMessages[] = { + nullptr, // NoError + REGEXP_ERROR_PREFIX "regular expression too large", // PatternTooLarge + REGEXP_ERROR_PREFIX "numbers out of order in {} quantifier", // QuantifierOutOfOrder + REGEXP_ERROR_PREFIX "nothing to repeat", // QuantifierWithoutAtom + REGEXP_ERROR_PREFIX "number too large in {} quantifier", // QuantifierTooLarge + REGEXP_ERROR_PREFIX "missing )", // MissingParentheses + REGEXP_ERROR_PREFIX "unmatched parentheses", // ParenthesesUnmatched + REGEXP_ERROR_PREFIX "unrecognized character after (?", // ParenthesesTypeInvalid + REGEXP_ERROR_PREFIX "invalid group specifier name", // InvalidGroupName + REGEXP_ERROR_PREFIX "duplicate group specifier name", // DuplicateGroupName + REGEXP_ERROR_PREFIX "missing terminating ] for character class", // CharacterClassUnmatched + REGEXP_ERROR_PREFIX "range out of order in character class", // CharacterClassOutOfOrder + REGEXP_ERROR_PREFIX "\\ at end of pattern", // EscapeUnterminated + REGEXP_ERROR_PREFIX "invalid unicode {} escape", // InvalidUnicodeEscape + REGEXP_ERROR_PREFIX "invalid backreference for unicode pattern", // InvalidBackreference + REGEXP_ERROR_PREFIX "invalid escaped character for unicode pattern", // InvalidIdentityEscape + REGEXP_ERROR_PREFIX "invalid property expression", // InvalidUnicodePropertyExpression + REGEXP_ERROR_PREFIX "too many nested disjunctions", // TooManyDisjunctions + REGEXP_ERROR_PREFIX "pattern exceeds string length limits", // OffsetTooLarge + REGEXP_ERROR_PREFIX "invalid flags" // InvalidRegularExpressionFlags + }; + + return errorMessages[static_cast<unsigned>(error)]; +} + +JSObject* errorToThrow(ExecState* exec, ErrorCode error) +{ + switch (error) { + case ErrorCode::NoError: + ASSERT_NOT_REACHED(); + return nullptr; + case ErrorCode::PatternTooLarge: + case ErrorCode::QuantifierOutOfOrder: + case ErrorCode::QuantifierWithoutAtom: + case ErrorCode::QuantifierTooLarge: + case ErrorCode::MissingParentheses: + case ErrorCode::ParenthesesUnmatched: + case ErrorCode::ParenthesesTypeInvalid: + case ErrorCode::InvalidGroupName: + case ErrorCode::DuplicateGroupName: + case ErrorCode::CharacterClassUnmatched: + case ErrorCode::CharacterClassOutOfOrder: + case ErrorCode::EscapeUnterminated: + case ErrorCode::InvalidUnicodeEscape: + case ErrorCode::InvalidBackreference: + case ErrorCode::InvalidIdentityEscape: + case ErrorCode::InvalidUnicodePropertyExpression: + case ErrorCode::OffsetTooLarge: + case ErrorCode::InvalidRegularExpressionFlags: + return createSyntaxError(exec, errorMessage(error)); + case ErrorCode::TooManyDisjunctions: + return createOutOfMemoryError(exec, errorMessage(error)); + } + + ASSERT_NOT_REACHED(); + return nullptr; +} + +} } // namespace JSC::Yarr diff --git a/src/3rdparty/masm/yarr/YarrErrorCode.h b/src/3rdparty/masm/yarr/YarrErrorCode.h new file mode 100644 index 0000000000..48f2bb7900 --- /dev/null +++ b/src/3rdparty/masm/yarr/YarrErrorCode.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2017 Yusuke Suzuki <utatane.tea@gmail.com>. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +namespace JSC { + +class ExecState; +class JSObject; + +namespace Yarr { + +enum class ErrorCode : uint8_t { + NoError = 0, + PatternTooLarge, + QuantifierOutOfOrder, + QuantifierWithoutAtom, + QuantifierTooLarge, + MissingParentheses, + ParenthesesUnmatched, + ParenthesesTypeInvalid, + InvalidGroupName, + DuplicateGroupName, + CharacterClassUnmatched, + CharacterClassOutOfOrder, + EscapeUnterminated, + InvalidUnicodeEscape, + InvalidBackreference, + InvalidIdentityEscape, + InvalidUnicodePropertyExpression, + TooManyDisjunctions, + OffsetTooLarge, + InvalidRegularExpressionFlags, +}; + +JS_EXPORT_PRIVATE const char* errorMessage(ErrorCode); +inline bool hasError(ErrorCode errorCode) +{ + return errorCode != ErrorCode::NoError; +} +JS_EXPORT_PRIVATE JSObject* errorToThrow(ExecState*, ErrorCode); + +} } // namespace JSC::Yarr diff --git a/src/3rdparty/masm/yarr/YarrInterpreter.cpp b/src/3rdparty/masm/yarr/YarrInterpreter.cpp index 16fc183cad..6eb6750dc4 100644 --- a/src/3rdparty/masm/yarr/YarrInterpreter.cpp +++ b/src/3rdparty/masm/yarr/YarrInterpreter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009 Apple Inc. All rights reserved. + * Copyright (C) 2009, 2013-2017 Apple Inc. All rights reserved. * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged * * Redistribution and use in source and binary forms, with or without @@ -27,17 +27,15 @@ #include "config.h" #include "YarrInterpreter.h" +#include "Options.h" +#include "SuperSampler.h" #include "Yarr.h" -#include "YarrCanonicalizeUCS2.h" +#include "YarrCanonicalize.h" #include <wtf/BumpPointerAllocator.h> #include <wtf/DataLog.h> #include <wtf/text/CString.h> #include <wtf/text/WTFString.h> -#ifndef NDEBUG -#include <stdio.h> -#endif - using namespace WTF; namespace JSC { namespace Yarr { @@ -47,28 +45,6 @@ class Interpreter { public: struct ParenthesesDisjunctionContext; - struct BackTrackInfoPatternCharacter { - uintptr_t matchAmount; - }; - struct BackTrackInfoCharacterClass { - uintptr_t matchAmount; - }; - struct BackTrackInfoBackReference { - uintptr_t begin; // Not really needed for greedy quantifiers. - uintptr_t matchAmount; // Not really needed for fixed quantifiers. - }; - struct BackTrackInfoAlternative { - uintptr_t offset; - }; - struct BackTrackInfoParentheticalAssertion { - uintptr_t begin; - }; - struct BackTrackInfoParenthesesOnce { - uintptr_t begin; - }; - struct BackTrackInfoParenthesesTerminal { - uintptr_t begin; - }; struct BackTrackInfoParentheses { uintptr_t matchAmount; ParenthesesDisjunctionContext* lastContext; @@ -158,7 +134,7 @@ public: ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term) { - size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); + size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + static_cast<size_t>(disjunction->m_frameSize) * sizeof(uintptr_t); allocatorPool = allocatorPool->ensureCapacity(size); RELEASE_ASSERT(allocatorPool); return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term); @@ -171,10 +147,11 @@ public: class InputStream { public: - InputStream(const CharType* input, unsigned start, unsigned length) + InputStream(const CharType* input, unsigned start, unsigned length, bool decodeSurrogatePairs) : input(input) , pos(start) , length(length) + , decodeSurrogatePairs(decodeSurrogatePairs) { } @@ -208,13 +185,40 @@ public: RELEASE_ASSERT(pos >= negativePositionOffest); unsigned p = pos - negativePositionOffest; ASSERT(p < length); - return input[p]; + int result = input[p]; + if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) { + if (atEnd()) + return -1; + + result = U16_GET_SUPPLEMENTARY(result, input[p + 1]); + next(); + } + return result; + } + + int readSurrogatePairChecked(unsigned negativePositionOffset) + { + RELEASE_ASSERT(pos >= negativePositionOffset); + unsigned p = pos - negativePositionOffset; + ASSERT(p < length); + if (p + 1 >= length) + return -1; + + int first = input[p]; + int second = input[p + 1]; + if (U16_IS_LEAD(first) && U16_IS_TRAIL(second)) + return U16_GET_SUPPLEMENTARY(first, second); + + return -1; } int reread(unsigned from) { ASSERT(from < length); - return input[from]; + int result = input[from]; + if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length && U16_IS_TRAIL(input[from + 1])) + result = U16_GET_SUPPLEMENTARY(result, input[from + 1]); + return result; } int prev() @@ -265,9 +269,9 @@ public: pos -= count; } - bool atStart(unsigned negativePositionOffest) + bool atStart(unsigned negativePositionOffset) { - return pos == negativePositionOffest; + return pos == negativePositionOffset; } bool atEnd(unsigned negativePositionOffest) @@ -285,24 +289,106 @@ public: const CharType* input; unsigned pos; unsigned length; + bool decodeSurrogatePairs; }; bool testCharacterClass(CharacterClass* characterClass, int ch) { - if (ch & 0xFF80) { - for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i) - if (ch == characterClass->m_matchesUnicode[i]) + auto linearSearchMatches = [&ch](const Vector<UChar32>& matches) { + for (unsigned i = 0; i < matches.size(); ++i) { + if (ch == matches[i]) + return true; + } + + return false; + }; + + auto binarySearchMatches = [&ch](const Vector<UChar32>& matches) { + size_t low = 0; + size_t high = matches.size() - 1; + + while (low <= high) { + size_t mid = low + (high - low) / 2; + int diff = ch - matches[mid]; + if (!diff) + return true; + + if (diff < 0) { + if (mid == low) + return false; + high = mid - 1; + } else + low = mid + 1; + } + return false; + }; + + auto linearSearchRanges = [&ch](const Vector<CharacterRange>& ranges) { + for (unsigned i = 0; i < ranges.size(); ++i) { + if ((ch >= ranges[i].begin) && (ch <= ranges[i].end)) return true; - for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i) - if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end)) + } + + return false; + }; + + auto binarySearchRanges = [&ch](const Vector<CharacterRange>& ranges) { + size_t low = 0; + size_t high = ranges.size() - 1; + + while (low <= high) { + size_t mid = low + (high - low) / 2; + int rangeBeginDiff = ch - ranges[mid].begin; + if (rangeBeginDiff >= 0 && ch <= ranges[mid].end) return true; + + if (rangeBeginDiff < 0) { + if (mid == low) + return false; + high = mid - 1; + } else + low = mid + 1; + } + return false; + }; + + if (characterClass->m_anyCharacter) + return true; + + const size_t thresholdForBinarySearch = 6; + + if (!isASCII(ch)) { + if (characterClass->m_matchesUnicode.size()) { + if (characterClass->m_matchesUnicode.size() > thresholdForBinarySearch) { + if (binarySearchMatches(characterClass->m_matchesUnicode)) + return true; + } else if (linearSearchMatches(characterClass->m_matchesUnicode)) + return true; + } + + if (characterClass->m_rangesUnicode.size()) { + if (characterClass->m_rangesUnicode.size() > thresholdForBinarySearch) { + if (binarySearchRanges(characterClass->m_rangesUnicode)) + return true; + } else if (linearSearchRanges(characterClass->m_rangesUnicode)) + return true; + } } else { - for (unsigned i = 0; i < characterClass->m_matches.size(); ++i) - if (ch == characterClass->m_matches[i]) + if (characterClass->m_matches.size()) { + if (characterClass->m_matches.size() > thresholdForBinarySearch) { + if (binarySearchMatches(characterClass->m_matches)) + return true; + } else if (linearSearchMatches(characterClass->m_matches)) return true; - for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i) - if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end)) + } + + if (characterClass->m_ranges.size()) { + if (characterClass->m_ranges.size() > thresholdForBinarySearch) { + if (binarySearchRanges(characterClass->m_ranges)) + return true; + } else if (linearSearchRanges(characterClass->m_ranges)) return true; + } } return false; @@ -313,6 +399,11 @@ public: return testChar == input.readChecked(negativeInputOffset); } + bool checkSurrogatePair(int testUnicodeChar, unsigned negativeInputOffset) + { + return testUnicodeChar == input.readSurrogatePairChecked(negativeInputOffset); + } + bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset) { int ch = input.readChecked(negativeInputOffset); @@ -332,32 +423,31 @@ public: if (!input.checkInput(matchSize)) return false; - if (pattern->m_ignoreCase) { - for (unsigned i = 0; i < matchSize; ++i) { - int oldCh = input.reread(matchBegin + i); - int ch = input.readChecked(negativeInputOffset + matchSize - i); - - if (oldCh == ch) - continue; - - // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that - // unicode values are never allowed to match against ascii ones. - if (isASCII(oldCh) || isASCII(ch)) { + for (unsigned i = 0; i < matchSize; ++i) { + int oldCh = input.reread(matchBegin + i); + int ch; + if (!U_IS_BMP(oldCh)) { + ch = input.readSurrogatePairChecked(negativeInputOffset + matchSize - i); + ++i; + } else + ch = input.readChecked(negativeInputOffset + matchSize - i); + + if (oldCh == ch) + continue; + + if (pattern->ignoreCase()) { + // See ES 6.0, 21.2.2.8.2 for the definition of Canonicalize(). For non-Unicode + // patterns, Unicode values are never allowed to match against ASCII ones. + // For Unicode, we need to check all canonical equivalents of a character. + if (!unicode && (isASCII(oldCh) || isASCII(ch))) { if (toASCIIUpper(oldCh) == toASCIIUpper(ch)) continue; - } else if (areCanonicallyEquivalent(oldCh, ch)) + } else if (areCanonicallyEquivalent(oldCh, ch, unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2)) continue; - - input.uncheckInput(matchSize); - return false; - } - } else { - for (unsigned i = 0; i < matchSize; ++i) { - if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) { - input.uncheckInput(matchSize); - return false; - } } + + input.uncheckInput(matchSize); + return false; } return true; @@ -365,15 +455,15 @@ public: bool matchAssertionBOL(ByteTerm& term) { - return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1))); + return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1))); } bool matchAssertionEOL(ByteTerm& term) { if (term.inputPosition) - return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition))); + return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition))); - return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read())); + return (input.atEnd()) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read())); } bool matchAssertionWordBoundary(ByteTerm& term) @@ -400,18 +490,18 @@ public: case QuantifierGreedy: if (backTrack->matchAmount) { --backTrack->matchAmount; - input.uncheckInput(1); + input.uncheckInput(U16_LENGTH(term.atom.patternCharacter)); return true; } break; case QuantifierNonGreedy: - if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { + if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) { ++backTrack->matchAmount; if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1)) return true; } - input.uncheckInput(backTrack->matchAmount); + input.setPos(backTrack->begin); break; } @@ -435,7 +525,7 @@ public: break; case QuantifierNonGreedy: - if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { + if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) { ++backTrack->matchAmount; if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1)) return true; @@ -450,11 +540,24 @@ public: bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeCharacterClass); - BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); + BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation); switch (term.atom.quantityType) { case QuantifierFixedCount: { - for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { + if (unicode) { + backTrack->begin = input.getPos(); + unsigned matchAmount = 0; + for (matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) { + if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) { + input.setPos(backTrack->begin); + return false; + } + } + + return true; + } + + for (unsigned matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) { if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) return false; } @@ -462,13 +565,16 @@ public: } case QuantifierGreedy: { + unsigned position = input.getPos(); + backTrack->begin = position; unsigned matchAmount = 0; - while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) { + while ((matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) { if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) { - input.uncheckInput(1); + input.setPos(position); break; } ++matchAmount; + position = input.getPos(); } backTrack->matchAmount = matchAmount; @@ -476,6 +582,7 @@ public: } case QuantifierNonGreedy: + backTrack->begin = input.getPos(); backTrack->matchAmount = 0; return true; } @@ -487,14 +594,28 @@ public: bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeCharacterClass); - BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); + BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation); switch (term.atom.quantityType) { case QuantifierFixedCount: + if (unicode) + input.setPos(backTrack->begin); break; case QuantifierGreedy: if (backTrack->matchAmount) { + if (unicode) { + // Rematch one less match + input.setPos(backTrack->begin); + --backTrack->matchAmount; + for (unsigned matchAmount = 0; (matchAmount < backTrack->matchAmount) && input.checkInput(1); ++matchAmount) { + if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) { + input.uncheckInput(1); + break; + } + } + return true; + } --backTrack->matchAmount; input.uncheckInput(1); return true; @@ -502,12 +623,12 @@ public: break; case QuantifierNonGreedy: - if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { + if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) { ++backTrack->matchAmount; if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) return true; } - input.uncheckInput(backTrack->matchAmount); + input.setPos(backTrack->begin); break; } @@ -539,7 +660,7 @@ public: switch (term.atom.quantityType) { case QuantifierFixedCount: { backTrack->begin = input.getPos(); - for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { + for (unsigned matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) { if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { input.setPos(backTrack->begin); return false; @@ -550,7 +671,7 @@ public: case QuantifierGreedy: { unsigned matchAmount = 0; - while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) + while ((matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) ++matchAmount; backTrack->matchAmount = matchAmount; return true; @@ -584,7 +705,7 @@ public: switch (term.atom.quantityType) { case QuantifierFixedCount: - // for quantityCount == 1, could rewind. + // for quantityMaxCount == 1, could rewind. input.setPos(backTrack->begin); break; @@ -597,7 +718,7 @@ public: break; case QuantifierNonGreedy: - if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { + if ((backTrack->matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { ++backTrack->matchAmount; return true; } @@ -612,8 +733,8 @@ public: { if (term.capture()) { unsigned subpatternId = term.atom.subpatternId; - output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition; - output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition; + output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin - term.inputPosition; + output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd - term.inputPosition; } } void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context) @@ -645,7 +766,7 @@ public: bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); @@ -675,11 +796,11 @@ public: bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); if (term.capture()) { unsigned subpatternId = term.atom.subpatternId; - output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition; + output[(subpatternId << 1) + 1] = input.getPos() - term.inputPosition; } if (term.atom.quantityType == QuantifierFixedCount) @@ -692,7 +813,7 @@ public: bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); @@ -711,6 +832,7 @@ public: return true; case QuantifierNonGreedy: ASSERT(backTrack->begin != notFound); + FALLTHROUGH; case QuantifierFixedCount: break; } @@ -721,7 +843,7 @@ public: bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); @@ -731,7 +853,7 @@ public: context->term -= term.atom.parenthesesWidth; return false; } - Q_FALLTHROUGH(); + FALLTHROUGH; case QuantifierNonGreedy: if (backTrack->begin == notFound) { backTrack->begin = input.getPos(); @@ -742,11 +864,12 @@ public: ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin); ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition); unsigned subpatternId = term.atom.subpatternId; - output[subpatternId << 1] = input.getPos() + term.inputPosition; + output[subpatternId << 1] = input.getPos() - term.inputPosition; } context->term -= term.atom.parenthesesWidth; return true; } + FALLTHROUGH; case QuantifierFixedCount: break; } @@ -758,7 +881,7 @@ public: { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); ASSERT(term.atom.quantityType == QuantifierGreedy); - ASSERT(term.atom.quantityCount == quantifyInfinite); + ASSERT(term.atom.quantityMaxCount == quantifyInfinite); ASSERT(!term.capture()); BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation); @@ -775,7 +898,7 @@ public: if (backTrack->begin == input.getPos()) return false; - // Successful match! Okay, what's next? - loop around and try to match moar! + // Successful match! Okay, what's next? - loop around and try to match more! context->term -= (term.atom.parenthesesWidth + 1); return true; } @@ -784,7 +907,7 @@ public: { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); ASSERT(term.atom.quantityType == QuantifierGreedy); - ASSERT(term.atom.quantityCount == quantifyInfinite); + ASSERT(term.atom.quantityMaxCount == quantifyInfinite); ASSERT(!term.capture()); // If we backtrack to this point, we have failed to match this iteration of the parens. @@ -804,7 +927,7 @@ public: bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); @@ -815,7 +938,7 @@ public: bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); @@ -833,7 +956,7 @@ public: bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); // We've failed to match parens; if they are inverted, this is win! if (term.invert()) { @@ -847,7 +970,7 @@ public: bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); @@ -867,36 +990,45 @@ public: backTrack->matchAmount = 0; backTrack->lastContext = 0; - switch (term.atom.quantityType) { - case QuantifierFixedCount: { + ASSERT(term.atom.quantityType != QuantifierFixedCount || term.atom.quantityMinCount == term.atom.quantityMaxCount); + + unsigned minimumMatchCount = term.atom.quantityMinCount; + JSRegExpResult fixedMatchResult; + + // Handle fixed matches and the minimum part of a variable length match. + if (minimumMatchCount) { // While we haven't yet reached our fixed limit, - while (backTrack->matchAmount < term.atom.quantityCount) { + while (backTrack->matchAmount < minimumMatchCount) { // Try to do a match, and it it succeeds, add it to the list. ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); - JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); - if (result == JSRegExpMatch) + fixedMatchResult = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); + if (fixedMatchResult == JSRegExpMatch) appendParenthesesDisjunctionContext(backTrack, context); else { // The match failed; try to find an alternate point to carry on from. resetMatches(term, context); freeParenthesesDisjunctionContext(context); - - if (result != JSRegExpNoMatch) - return result; + + if (fixedMatchResult != JSRegExpNoMatch) + return fixedMatchResult; JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); if (backtrackResult != JSRegExpMatch) return backtrackResult; } } - ASSERT(backTrack->matchAmount == term.atom.quantityCount); ParenthesesDisjunctionContext* context = backTrack->lastContext; recordParenthesesMatch(term, context); + } + + switch (term.atom.quantityType) { + case QuantifierFixedCount: { + ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount); return JSRegExpMatch; } case QuantifierGreedy: { - while (backTrack->matchAmount < term.atom.quantityCount) { + while (backTrack->matchAmount < term.atom.quantityMaxCount) { ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); if (result == JSRegExpMatch) @@ -946,7 +1078,7 @@ public: switch (term.atom.quantityType) { case QuantifierFixedCount: { - ASSERT(backTrack->matchAmount == term.atom.quantityCount); + ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount); ParenthesesDisjunctionContext* context = 0; JSRegExpResult result = parenthesesDoBacktrack(term, backTrack); @@ -955,7 +1087,7 @@ public: return result; // While we haven't yet reached our fixed limit, - while (backTrack->matchAmount < term.atom.quantityCount) { + while (backTrack->matchAmount < term.atom.quantityMaxCount) { // Try to do a match, and it it succeeds, add it to the list. context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); @@ -975,7 +1107,7 @@ public: } } - ASSERT(backTrack->matchAmount == term.atom.quantityCount); + ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount); context = backTrack->lastContext; recordParenthesesMatch(term, context); return JSRegExpMatch; @@ -988,7 +1120,7 @@ public: ParenthesesDisjunctionContext* context = backTrack->lastContext; JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); if (result == JSRegExpMatch) { - while (backTrack->matchAmount < term.atom.quantityCount) { + while (backTrack->matchAmount < term.atom.quantityMaxCount) { ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); if (parenthesesResult == JSRegExpMatch) @@ -1008,7 +1140,7 @@ public: popParenthesesDisjunctionContext(backTrack); freeParenthesesDisjunctionContext(context); - if (result != JSRegExpNoMatch) + if (result != JSRegExpNoMatch || backTrack->matchAmount < term.atom.quantityMinCount) return result; } @@ -1021,7 +1153,7 @@ public: case QuantifierNonGreedy: { // If we've not reached the limit, try to add one more match. - if (backTrack->matchAmount < term.atom.quantityCount) { + if (backTrack->matchAmount < term.atom.quantityMaxCount) { ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); if (result == JSRegExpMatch) { @@ -1070,16 +1202,23 @@ public: bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context) { UNUSED_PARAM(term); + + if (pattern->dotAll()) { + context->matchBegin = startOffset; + context->matchEnd = input.end(); + return true; + } + unsigned matchBegin = context->matchBegin; - if (matchBegin) { + if (matchBegin > startOffset) { for (matchBegin--; true; matchBegin--) { if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) { ++matchBegin; break; } - if (!matchBegin) + if (matchBegin == startOffset) break; } } @@ -1091,7 +1230,7 @@ public: if (((matchBegin && term.anchors.m_bol) || ((matchEnd != input.end()) && term.anchors.m_eol)) - && !pattern->m_multiline) + && !pattern->multiline()) return false; context->matchBegin = matchBegin; @@ -1156,21 +1295,37 @@ public: case ByteTerm::TypePatternCharacterOnce: case ByteTerm::TypePatternCharacterFixed: { - for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { - if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) + if (unicode) { + if (!U_IS_BMP(currentTerm().atom.patternCharacter)) { + for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) { + if (!checkSurrogatePair(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 2 * matchAmount)) { + BACKTRACK(); + } + } + MATCH_NEXT(); + } + } + unsigned position = input.getPos(); // May need to back out reading a surrogate pair. + + for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) { + if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) { + input.setPos(position); BACKTRACK(); + } } MATCH_NEXT(); } case ByteTerm::TypePatternCharacterGreedy: { BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); unsigned matchAmount = 0; - while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { + unsigned position = input.getPos(); // May need to back out reading a surrogate pair. + while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(1)) { if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) { - input.uncheckInput(1); + input.setPos(position); break; } ++matchAmount; + position = input.getPos(); } backTrack->matchAmount = matchAmount; @@ -1178,13 +1333,29 @@ public: } case ByteTerm::TypePatternCharacterNonGreedy: { BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); + backTrack->begin = input.getPos(); backTrack->matchAmount = 0; MATCH_NEXT(); } case ByteTerm::TypePatternCasedCharacterOnce: case ByteTerm::TypePatternCasedCharacterFixed: { - for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { + if (unicode) { + // Case insensitive matching of unicode characters is handled as TypeCharacterClass. + ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter)); + + unsigned position = input.getPos(); // May need to back out reading a surrogate pair. + + for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) { + if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) { + input.setPos(position); + BACKTRACK(); + } + } + MATCH_NEXT(); + } + + for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) { if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) BACKTRACK(); } @@ -1192,8 +1363,12 @@ public: } case ByteTerm::TypePatternCasedCharacterGreedy: { BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); + + // Case insensitive matching of unicode characters is handled as TypeCharacterClass. + ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter)); + unsigned matchAmount = 0; - while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { + while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(1)) { if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) { input.uncheckInput(1); break; @@ -1206,6 +1381,10 @@ public: } case ByteTerm::TypePatternCasedCharacterNonGreedy: { BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); + + // Case insensitive matching of unicode characters is handled as TypeCharacterClass. + ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter)); + backTrack->matchAmount = 0; MATCH_NEXT(); } @@ -1287,7 +1466,7 @@ public: if (offset > 0) MATCH_NEXT(); - if (input.atEnd()) + if (input.atEnd() || pattern->sticky()) return JSRegExpNoMatch; input.next(); @@ -1417,6 +1596,9 @@ public: if (!input.isAvailableInput(0)) return offsetNoMatch; + if (pattern->m_lock) + pattern->m_lock->lock(); + for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i) output[i << 1] = offsetNoMatch; @@ -1436,23 +1618,31 @@ public: pattern->m_allocator->stopAllocator(); ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch)); + + if (pattern->m_lock) + pattern->m_lock->unlock(); + return output[0]; } Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start) : pattern(pattern) + , unicode(pattern->unicode()) , output(output) - , input(input, start, length) + , input(input, start, length, pattern->unicode()) , allocatorPool(0) + , startOffset(start) , remainingMatchCount(matchLimit) { } private: BytecodePattern* pattern; + bool unicode; unsigned* output; InputStream input; BumpPointerPool* allocatorPool; + unsigned startOffset; unsigned remainingMatchCount; }; @@ -1474,13 +1664,18 @@ public: m_currentAlternativeIndex = 0; } - PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator) + std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator, ConcurrentJSLock* lock) { regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough()); emitDisjunction(m_pattern.m_body); regexEnd(); - return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator)); +#ifndef NDEBUG + if (Options::dumpCompiledRegExpPatterns()) + dumpDisjunction(m_bodyDisjunction.get()); +#endif + + return std::make_unique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator, lock); } void checkInput(unsigned count) @@ -1508,45 +1703,44 @@ public: m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition)); } - void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + void atomPatternCharacter(UChar32 ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType) { - if (m_pattern.m_ignoreCase) { - UChar lo = Unicode::toLower(ch); - UChar hi = Unicode::toUpper(ch); + if (m_pattern.ignoreCase()) { + UChar32 lo = u_tolower(ch); + UChar32 hi = u_toupper(ch); if (lo != hi) { - m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType)); + m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityMaxCount, quantityType)); return; } } - m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType)); + m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityMaxCount, quantityType)); } - void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType) { m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition)); - m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; } - void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType) { ASSERT(subpatternId); m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition)); - m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; } void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) { - ASSERT(m_bodyDisjunction->terms.size() <= INT_MAX); - int beginTerm = static_cast<int>(m_bodyDisjunction->terms.size()); + unsigned beginTerm = m_bodyDisjunction->terms.size(); m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; @@ -1559,8 +1753,7 @@ public: void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) { - ASSERT(m_bodyDisjunction->terms.size() <= INT_MAX); - int beginTerm = static_cast<int>(m_bodyDisjunction->terms.size()); + int beginTerm = m_bodyDisjunction->terms.size(); m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition)); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; @@ -1577,8 +1770,7 @@ public: // then fix this up at the end! - simplifying this should make it much clearer. // https://bugs.webkit.org/show_bug.cgi?id=50136 - ASSERT(m_bodyDisjunction->terms.size() <= INT_MAX); - int beginTerm = static_cast<int>(m_bodyDisjunction->terms.size()); + int beginTerm = m_bodyDisjunction->terms.size(); m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; @@ -1591,8 +1783,7 @@ public: void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation) { - ASSERT(m_bodyDisjunction->terms.size() <= INT_MAX); - int beginTerm = static_cast<int>(m_bodyDisjunction->terms.size()); + int beginTerm = m_bodyDisjunction->terms.size(); m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0)); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; @@ -1603,12 +1794,11 @@ public: m_currentAlternativeIndex = beginTerm + 1; } - void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType) { unsigned beginTerm = popParenthesesStack(); closeAlternative(beginTerm + 1); - ASSERT(m_bodyDisjunction->terms.size() <= INT_MAX); - unsigned endTerm = static_cast<int>(m_bodyDisjunction->terms.size()); + unsigned endTerm = m_bodyDisjunction->terms.size(); ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin); @@ -1620,9 +1810,9 @@ public: m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; - m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; - m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; } @@ -1634,8 +1824,7 @@ public: unsigned popParenthesesStack() { ASSERT(m_parenthesesStack.size()); - ASSERT(m_parenthesesStack.size() <= INT_MAX); - int stackEnd = static_cast<int>(m_parenthesesStack.size()) - 1; + int stackEnd = m_parenthesesStack.size() - 1; unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm; m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex; m_parenthesesStack.shrink(stackEnd); @@ -1646,22 +1835,11 @@ public: return beginTerm; } -#ifndef NDEBUG - void dumpDisjunction(ByteDisjunction* disjunction) - { - dataLogF("ByteDisjunction(%p):\n\t", disjunction); - for (unsigned i = 0; i < disjunction->terms.size(); ++i) - dataLogF("{ %d } ", disjunction->terms[i].type); - dataLogF("\n"); - } -#endif - void closeAlternative(int beginTerm) { int origBeginTerm = beginTerm; ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin); - ASSERT(m_bodyDisjunction->terms.size() <= INT_MAX); - int endIndex = static_cast<int>(m_bodyDisjunction->terms.size()); + int endIndex = m_bodyDisjunction->terms.size(); unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; @@ -1687,8 +1865,7 @@ public: int beginTerm = 0; int origBeginTerm = 0; ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin); - ASSERT(m_bodyDisjunction->terms.size() <= INT_MAX); - int endIndex = static_cast<int>(m_bodyDisjunction->terms.size()); + int endIndex = m_bodyDisjunction->terms.size(); unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; @@ -1705,12 +1882,11 @@ public: m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; } - void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0) + void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType, unsigned callFrameSize = 0) { unsigned beginTerm = popParenthesesStack(); closeAlternative(beginTerm + 1); - ASSERT(m_bodyDisjunction->terms.size() <= INT_MAX); - unsigned endTerm = static_cast<int>(m_bodyDisjunction->terms.size()); + unsigned endTerm = m_bodyDisjunction->terms.size(); ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin); @@ -1720,7 +1896,7 @@ public: unsigned subpatternId = parenthesesBegin.atom.subpatternId; unsigned numSubpatterns = lastSubpatternId - subpatternId + 1; - OwnPtr<ByteDisjunction> parenthesesDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize)); + auto parenthesesDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize); unsigned firstTermInParentheses = beginTerm + 1; parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2); @@ -1733,19 +1909,19 @@ public: m_bodyDisjunction->terms.shrink(beginTerm); m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition)); - m_allParenthesesInfo.append(parenthesesDisjunction.release()); + m_allParenthesesInfo.append(WTFMove(parenthesesDisjunction)); - m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; } - void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + void atomParenthesesOnceEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType) { unsigned beginTerm = popParenthesesStack(); closeAlternative(beginTerm + 1); - ASSERT(m_bodyDisjunction->terms.size() <= INT_MAX); - unsigned endTerm = static_cast<int>(m_bodyDisjunction->terms.size()); + unsigned endTerm = m_bodyDisjunction->terms.size(); ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin); @@ -1757,18 +1933,19 @@ public: m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; - m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; - m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet(); + m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; } - void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + void atomParenthesesTerminalEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType) { unsigned beginTerm = popParenthesesStack(); closeAlternative(beginTerm + 1); - ASSERT(m_bodyDisjunction->terms.size() <= INT_MAX); - unsigned endTerm = static_cast<int>(m_bodyDisjunction->terms.size()); + unsigned endTerm = m_bodyDisjunction->terms.size(); ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); @@ -1780,15 +1957,17 @@ public: m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; - m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; - m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet(); + m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; } void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough) { - m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize)); + m_bodyDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize); m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough)); m_bodyDisjunction->terms[0].frameLocation = 0; m_currentAlternativeIndex = 0; @@ -1801,8 +1980,7 @@ public: void alternativeBodyDisjunction(bool onceThrough) { - ASSERT(m_bodyDisjunction->terms.size() <= INT_MAX); - int newAlternativeIndex = static_cast<int>(m_bodyDisjunction->terms.size()); + int newAlternativeIndex = m_bodyDisjunction->terms.size(); m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough)); @@ -1811,8 +1989,7 @@ public: void alternativeDisjunction() { - ASSERT(m_bodyDisjunction->terms.size() <= INT_MAX); - int newAlternativeIndex = static_cast<int>(m_bodyDisjunction->terms.size()); + int newAlternativeIndex = m_bodyDisjunction->terms.size(); m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction()); @@ -1842,9 +2019,7 @@ public: currentCountAlreadyChecked += countToCheck; } - for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { - PatternTerm& term = alternative->m_terms[i]; - + for (auto& term : alternative->m_terms) { switch (term.type) { case PatternTerm::TypeAssertionBOL: assertionBOL(currentCountAlreadyChecked - term.inputPosition); @@ -1859,15 +2034,15 @@ public: break; case PatternTerm::TypePatternCharacter: - atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); + atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType); break; case PatternTerm::TypeCharacterClass: - atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); + atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType); break; case PatternTerm::TypeBackReference: - atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); + atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType); break; case PatternTerm::TypeForwardReference: @@ -1875,27 +2050,30 @@ public: case PatternTerm::TypeParenthesesSubpattern: { unsigned disjunctionAlreadyCheckedCount = 0; - if (term.quantityCount == 1 && !term.parentheses.isCopy) { + if (term.quantityMaxCount == 1 && !term.parentheses.isCopy) { unsigned alternativeFrameLocation = term.frameLocation; // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame. if (term.quantityType == QuantifierFixedCount) disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize; else alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; - unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; - atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation); + ASSERT(currentCountAlreadyChecked >= term.inputPosition); + unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition; + atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, alternativeFrameLocation); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); - atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); + atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType); } else if (term.parentheses.isTerminal) { - unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; - atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce); + ASSERT(currentCountAlreadyChecked >= term.inputPosition); + unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition; + atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesTerminal); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); - atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); + atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType); } else { - unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; - atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0); + ASSERT(currentCountAlreadyChecked >= term.inputPosition); + unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition; + atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, 0); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0); - atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize); + atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize); } break; } @@ -1903,8 +2081,8 @@ public: case PatternTerm::TypeParentheticalAssertion: { unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion; - ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition)); - unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition); + ASSERT(currentCountAlreadyChecked >= term.inputPosition); + unsigned positiveInputOffset = currentCountAlreadyChecked - term.inputPosition; unsigned uncheckAmount = 0; if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) { uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize; @@ -1914,7 +2092,7 @@ public: atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount); - atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType); + atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityMaxCount, term.quantityType); if (uncheckAmount) { checkInput(uncheckAmount); currentCountAlreadyChecked += uncheckAmount; @@ -1929,22 +2107,283 @@ public: } } } +#ifndef NDEBUG + void dumpDisjunction(ByteDisjunction* disjunction, unsigned nesting = 0) + { + PrintStream& out = WTF::dataFile(); + + unsigned termIndexNest = 0; + + if (!nesting) { + out.printf("ByteDisjunction(%p):\n", disjunction); + nesting = 1; + } else { + termIndexNest = nesting - 1; + nesting = 2; + } + + auto outputTermIndexAndNest = [&](size_t index, unsigned termNesting) { + for (unsigned nestingDepth = 0; nestingDepth < termIndexNest; nestingDepth++) + out.print(" "); + out.printf("%4zu", index); + for (unsigned nestingDepth = 0; nestingDepth < termNesting; nestingDepth++) + out.print(" "); + }; + + auto dumpQuantity = [&](ByteTerm& term) { + if (term.atom.quantityType == QuantifierFixedCount && term.atom.quantityMinCount == 1 && term.atom.quantityMaxCount == 1) + return; + + out.print(" {", term.atom.quantityMinCount); + if (term.atom.quantityMinCount != term.atom.quantityMaxCount) { + if (term.atom.quantityMaxCount == UINT_MAX) + out.print(",inf"); + else + out.print(",", term.atom.quantityMaxCount); + } + out.print("}"); + if (term.atom.quantityType == QuantifierGreedy) + out.print(" greedy"); + else if (term.atom.quantityType == QuantifierNonGreedy) + out.print(" non-greedy"); + }; + + auto dumpCaptured = [&](ByteTerm& term) { + if (term.capture()) + out.print(" captured (#", term.atom.subpatternId, ")"); + }; + + auto dumpInverted = [&](ByteTerm& term) { + if (term.invert()) + out.print(" inverted"); + }; + + auto dumpInputPosition = [&](ByteTerm& term) { + out.printf(" inputPosition %u", term.inputPosition); + }; + + auto dumpFrameLocation = [&](ByteTerm& term) { + out.printf(" frameLocation %u", term.frameLocation); + }; + + auto dumpCharacter = [&](ByteTerm& term) { + out.print(" "); + dumpUChar32(out, term.atom.patternCharacter); + }; + + auto dumpCharClass = [&](ByteTerm& term) { + out.print(" "); + dumpCharacterClass(out, &m_pattern, term.atom.characterClass); + }; + + for (size_t idx = 0; idx < disjunction->terms.size(); ++idx) { + ByteTerm term = disjunction->terms[idx]; + + bool outputNewline = true; + + switch (term.type) { + case ByteTerm::TypeBodyAlternativeBegin: + outputTermIndexAndNest(idx, nesting++); + out.print("BodyAlternativeBegin"); + if (term.alternative.onceThrough) + out.print(" onceThrough"); + dumpFrameLocation(term); + break; + case ByteTerm::TypeBodyAlternativeDisjunction: + outputTermIndexAndNest(idx, nesting - 1); + out.print("BodyAlternativeDisjunction"); + dumpFrameLocation(term); + break; + case ByteTerm::TypeBodyAlternativeEnd: + outputTermIndexAndNest(idx, --nesting); + out.print("BodyAlternativeEnd"); + dumpFrameLocation(term); + break; + case ByteTerm::TypeAlternativeBegin: + outputTermIndexAndNest(idx, nesting++); + out.print("AlternativeBegin"); + dumpFrameLocation(term); + break; + case ByteTerm::TypeAlternativeDisjunction: + outputTermIndexAndNest(idx, nesting - 1); + out.print("AlternativeDisjunction"); + dumpFrameLocation(term); + break; + case ByteTerm::TypeAlternativeEnd: + outputTermIndexAndNest(idx, --nesting); + out.print("AlternativeEnd"); + dumpFrameLocation(term); + break; + case ByteTerm::TypeSubpatternBegin: + outputTermIndexAndNest(idx, nesting++); + out.print("SubpatternBegin"); + break; + case ByteTerm::TypeSubpatternEnd: + outputTermIndexAndNest(idx, --nesting); + out.print("SubpatternEnd"); + break; + case ByteTerm::TypeAssertionBOL: + outputTermIndexAndNest(idx, nesting); + out.print("AssertionBOL"); + break; + case ByteTerm::TypeAssertionEOL: + outputTermIndexAndNest(idx, nesting); + out.print("AssertionEOL"); + break; + case ByteTerm::TypeAssertionWordBoundary: + outputTermIndexAndNest(idx, nesting); + out.print("AssertionWordBoundary"); + break; + case ByteTerm::TypePatternCharacterOnce: + outputTermIndexAndNest(idx, nesting); + out.print("PatternCharacterOnce"); + dumpInverted(term); + dumpInputPosition(term); + dumpFrameLocation(term); + dumpCharacter(term); + dumpQuantity(term); + break; + case ByteTerm::TypePatternCharacterFixed: + outputTermIndexAndNest(idx, nesting); + out.print("PatternCharacterFixed"); + dumpInverted(term); + dumpInputPosition(term); + dumpFrameLocation(term); + dumpCharacter(term); + out.print(" {", term.atom.quantityMinCount, "}"); + break; + case ByteTerm::TypePatternCharacterGreedy: + outputTermIndexAndNest(idx, nesting); + out.print("PatternCharacterGreedy"); + dumpInverted(term); + dumpInputPosition(term); + dumpFrameLocation(term); + dumpCharacter(term); + dumpQuantity(term); + break; + case ByteTerm::TypePatternCharacterNonGreedy: + outputTermIndexAndNest(idx, nesting); + out.print("PatternCharacterNonGreedy"); + dumpInverted(term); + dumpInputPosition(term); + dumpFrameLocation(term); + dumpCharacter(term); + dumpQuantity(term); + break; + case ByteTerm::TypePatternCasedCharacterOnce: + outputTermIndexAndNest(idx, nesting); + out.print("PatternCasedCharacterOnce"); + break; + case ByteTerm::TypePatternCasedCharacterFixed: + outputTermIndexAndNest(idx, nesting); + out.print("PatternCasedCharacterFixed"); + break; + case ByteTerm::TypePatternCasedCharacterGreedy: + outputTermIndexAndNest(idx, nesting); + out.print("PatternCasedCharacterGreedy"); + break; + case ByteTerm::TypePatternCasedCharacterNonGreedy: + outputTermIndexAndNest(idx, nesting); + out.print("PatternCasedCharacterNonGreedy"); + break; + case ByteTerm::TypeCharacterClass: + outputTermIndexAndNest(idx, nesting); + out.print("CharacterClass"); + dumpInverted(term); + dumpInputPosition(term); + dumpFrameLocation(term); + dumpCharClass(term); + dumpQuantity(term); + break; + case ByteTerm::TypeBackReference: + outputTermIndexAndNest(idx, nesting); + out.print("BackReference #", term.atom.subpatternId); + dumpQuantity(term); + break; + case ByteTerm::TypeParenthesesSubpattern: + outputTermIndexAndNest(idx, nesting); + out.print("ParenthesesSubpattern"); + dumpCaptured(term); + dumpInverted(term); + dumpInputPosition(term); + dumpFrameLocation(term); + dumpQuantity(term); + out.print("\n"); + outputNewline = false; + dumpDisjunction(term.atom.parenthesesDisjunction, nesting); + break; + case ByteTerm::TypeParenthesesSubpatternOnceBegin: + outputTermIndexAndNest(idx, nesting++); + out.print("ParenthesesSubpatternOnceBegin"); + dumpCaptured(term); + dumpInverted(term); + dumpInputPosition(term); + dumpFrameLocation(term); + break; + case ByteTerm::TypeParenthesesSubpatternOnceEnd: + outputTermIndexAndNest(idx, --nesting); + out.print("ParenthesesSubpatternOnceEnd"); + dumpFrameLocation(term); + break; + case ByteTerm::TypeParenthesesSubpatternTerminalBegin: + outputTermIndexAndNest(idx, nesting++); + out.print("ParenthesesSubpatternTerminalBegin"); + dumpInverted(term); + dumpInputPosition(term); + dumpFrameLocation(term); + break; + case ByteTerm::TypeParenthesesSubpatternTerminalEnd: + outputTermIndexAndNest(idx, --nesting); + out.print("ParenthesesSubpatternTerminalEnd"); + dumpFrameLocation(term); + break; + case ByteTerm::TypeParentheticalAssertionBegin: + outputTermIndexAndNest(idx, nesting++); + out.print("ParentheticalAssertionBegin"); + dumpInverted(term); + dumpInputPosition(term); + dumpFrameLocation(term); + break; + case ByteTerm::TypeParentheticalAssertionEnd: + outputTermIndexAndNest(idx, --nesting); + out.print("ParentheticalAssertionEnd"); + dumpFrameLocation(term); + break; + case ByteTerm::TypeCheckInput: + outputTermIndexAndNest(idx, nesting); + out.print("CheckInput ", term.checkInputCount); + break; + case ByteTerm::TypeUncheckInput: + outputTermIndexAndNest(idx, nesting); + out.print("UncheckInput ", term.checkInputCount); + break; + case ByteTerm::TypeDotStarEnclosure: + outputTermIndexAndNest(idx, nesting); + out.print("DotStarEnclosure"); + break; + } + if (outputNewline) + out.print("\n"); + } + } +#endif private: YarrPattern& m_pattern; - OwnPtr<ByteDisjunction> m_bodyDisjunction; + std::unique_ptr<ByteDisjunction> m_bodyDisjunction; unsigned m_currentAlternativeIndex; Vector<ParenthesesStackEntry> m_parenthesesStack; - Vector<OwnPtr<ByteDisjunction> > m_allParenthesesInfo; + Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo; }; -PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator) +std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator, ConcurrentJSLock* lock) { - return ByteCompiler(pattern).compile(allocator); + return ByteCompiler(pattern).compile(allocator, lock); } unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output) { + SuperSamplerScope superSamplerScope(false); if (input.is8Bit()) return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret(); return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret(); @@ -1952,22 +2391,24 @@ unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned star unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output) { + SuperSamplerScope superSamplerScope(false); return Interpreter<LChar>(bytecode, output, input, length, start).interpret(); } unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output) { + SuperSamplerScope superSamplerScope(false); return Interpreter<UChar>(bytecode, output, input, length, start).interpret(); } // These should be the same for both UChar & LChar. -COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter); -COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass); -COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference); -COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative); -COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion); -COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce); -COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses); +COMPILE_ASSERT(sizeof(BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter); +COMPILE_ASSERT(sizeof(BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass); +COMPILE_ASSERT(sizeof(BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference); +COMPILE_ASSERT(sizeof(BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative); +COMPILE_ASSERT(sizeof(BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion); +COMPILE_ASSERT(sizeof(BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce); +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) <= (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses); } } diff --git a/src/3rdparty/masm/yarr/YarrInterpreter.h b/src/3rdparty/masm/yarr/YarrInterpreter.h index 3b44acbd2b..a319cb3461 100644 --- a/src/3rdparty/masm/yarr/YarrInterpreter.h +++ b/src/3rdparty/masm/yarr/YarrInterpreter.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. + * Copyright (C) 2009, 2010-2012, 2014, 2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,12 +23,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef YarrInterpreter_h -#define YarrInterpreter_h +#pragma once +#include "ConcurrentJSLock.h" #include "YarrPattern.h" -#include <wtf/PassOwnPtr.h> -#include <wtf/unicode/Unicode.h> namespace WTF { class BumpPointerAllocator; @@ -76,10 +74,10 @@ struct ByteTerm { union { struct { union { - UChar patternCharacter; + UChar32 patternCharacter; struct { - UChar lo; - UChar hi; + UChar32 lo; + UChar32 hi; } casedCharacter; CharacterClass* characterClass; unsigned subpatternId; @@ -89,7 +87,8 @@ struct ByteTerm { unsigned parenthesesWidth; }; QuantifierType quantityType; - unsigned quantityCount; + unsigned quantityMinCount; + unsigned quantityMaxCount; } atom; struct { int next; @@ -107,11 +106,17 @@ struct ByteTerm { bool m_invert : 1; unsigned inputPosition; - ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + ByteTerm(UChar32 ch, unsigned inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) : frameLocation(frameLocation) , m_capture(false) , m_invert(false) { + atom.patternCharacter = ch; + atom.quantityType = quantityType; + atom.quantityMinCount = quantityCount.unsafeGet(); + atom.quantityMaxCount = quantityCount.unsafeGet(); + inputPosition = inputPos; + switch (quantityType) { case QuantifierFixedCount: type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed; @@ -123,14 +128,9 @@ struct ByteTerm { type = ByteTerm::TypePatternCharacterNonGreedy; break; } - - atom.patternCharacter = ch; - atom.quantityType = quantityType; - atom.quantityCount = quantityCount.unsafeGet(); - inputPosition = inputPos; } - ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + ByteTerm(UChar32 lo, UChar32 hi, unsigned inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) : frameLocation(frameLocation) , m_capture(false) , m_invert(false) @@ -150,22 +150,24 @@ struct ByteTerm { atom.casedCharacter.lo = lo; atom.casedCharacter.hi = hi; atom.quantityType = quantityType; - atom.quantityCount = quantityCount.unsafeGet(); + atom.quantityMinCount = quantityCount.unsafeGet(); + atom.quantityMaxCount = quantityCount.unsafeGet(); inputPosition = inputPos; } - ByteTerm(CharacterClass* characterClass, bool invert, int inputPos) + ByteTerm(CharacterClass* characterClass, bool invert, unsigned inputPos) : type(ByteTerm::TypeCharacterClass) , m_capture(false) , m_invert(invert) { atom.characterClass = characterClass; atom.quantityType = QuantifierFixedCount; - atom.quantityCount = 1; + atom.quantityMinCount = 1; + atom.quantityMaxCount = 1; inputPosition = inputPos; } - ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos) + ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, unsigned inputPos) : type(type) , m_capture(capture) , m_invert(false) @@ -173,7 +175,8 @@ struct ByteTerm { atom.subpatternId = subpatternId; atom.parenthesesDisjunction = parenthesesInfo; atom.quantityType = QuantifierFixedCount; - atom.quantityCount = 1; + atom.quantityMinCount = 1; + atom.quantityMaxCount = 1; inputPosition = inputPos; } @@ -183,21 +186,23 @@ struct ByteTerm { , m_invert(invert) { atom.quantityType = QuantifierFixedCount; - atom.quantityCount = 1; + atom.quantityMinCount = 1; + atom.quantityMaxCount = 1; } - ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos) + ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, unsigned inputPos) : type(type) , m_capture(capture) , m_invert(invert) { atom.subpatternId = subpatternId; atom.quantityType = QuantifierFixedCount; - atom.quantityCount = 1; + atom.quantityMinCount = 1; + atom.quantityMaxCount = 1; inputPosition = inputPos; } - static ByteTerm BOL(int inputPos) + static ByteTerm BOL(unsigned inputPos) { ByteTerm term(TypeAssertionBOL); term.inputPosition = inputPos; @@ -218,21 +223,21 @@ struct ByteTerm { return term; } - static ByteTerm EOL(int inputPos) + static ByteTerm EOL(unsigned inputPos) { ByteTerm term(TypeAssertionEOL); term.inputPosition = inputPos; return term; } - static ByteTerm WordBoundary(bool invert, int inputPos) + static ByteTerm WordBoundary(bool invert, unsigned inputPos) { ByteTerm term(TypeAssertionWordBoundary, invert); term.inputPosition = inputPos; return term; } - static ByteTerm BackReference(unsigned subpatternId, int inputPos) + static ByteTerm BackReference(unsigned subpatternId, unsigned inputPos) { return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos); } @@ -329,6 +334,8 @@ public: { } + size_t estimatedSizeInBytes() const { return terms.capacity() * sizeof(ByteTerm); } + Vector<ByteTerm> terms; unsigned m_numSubpatterns; unsigned m_frameSize; @@ -337,16 +344,19 @@ public: struct BytecodePattern { WTF_MAKE_FAST_ALLOCATED; public: - BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<OwnPtr<ByteDisjunction> >& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator) - : m_body(body) - , m_ignoreCase(pattern.m_ignoreCase) - , m_multiline(pattern.m_multiline) + BytecodePattern(std::unique_ptr<ByteDisjunction> body, Vector<std::unique_ptr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator, ConcurrentJSLock* lock) + : m_body(WTFMove(body)) + , m_flags(pattern.m_flags) , m_allocator(allocator) + , m_lock(lock) { m_body->terms.shrinkToFit(); newlineCharacterClass = pattern.newlineCharacterClass(); - wordcharCharacterClass = pattern.wordcharCharacterClass(); + if (unicode() && ignoreCase()) + wordcharCharacterClass = pattern.wordUnicodeIgnoreCaseCharCharacterClass(); + else + wordcharCharacterClass = pattern.wordcharCharacterClass(); m_allParenthesesInfo.swap(parenthesesInfoToAdopt); m_allParenthesesInfo.shrinkToFit(); @@ -355,26 +365,32 @@ public: m_userCharacterClasses.shrinkToFit(); } - OwnPtr<ByteDisjunction> m_body; - bool m_ignoreCase; - bool m_multiline; + size_t estimatedSizeInBytes() const { return m_body->estimatedSizeInBytes(); } + + bool ignoreCase() const { return m_flags & FlagIgnoreCase; } + bool multiline() const { return m_flags & FlagMultiline; } + bool sticky() const { return m_flags & FlagSticky; } + bool unicode() const { return m_flags & FlagUnicode; } + bool dotAll() const { return m_flags & FlagDotAll; } + + std::unique_ptr<ByteDisjunction> m_body; + RegExpFlags m_flags; // Each BytecodePattern is associated with a RegExp, each RegExp is associated - // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator. + // with a VM. Cache a pointer to out VM's m_regExpAllocator. BumpPointerAllocator* m_allocator; + ConcurrentJSLock* m_lock; CharacterClass* newlineCharacterClass; CharacterClass* wordcharCharacterClass; private: - Vector<OwnPtr<ByteDisjunction> > m_allParenthesesInfo; - Vector<OwnPtr<CharacterClass> > m_userCharacterClasses; + Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo; + Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses; }; -JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*); +JS_EXPORT_PRIVATE std::unique_ptr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*, ConcurrentJSLock* = nullptr); JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const String& input, unsigned start, unsigned* output); unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output); unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output); } } // namespace JSC::Yarr - -#endif // YarrInterpreter_h diff --git a/src/3rdparty/masm/yarr/YarrJIT.cpp b/src/3rdparty/masm/yarr/YarrJIT.cpp index 71123b7be7..ce7c7163ed 100644 --- a/src/3rdparty/masm/yarr/YarrJIT.cpp +++ b/src/3rdparty/masm/yarr/YarrJIT.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009, 2013 Apple Inc. All rights reserved. + * Copyright (C) 2009-2018 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -25,22 +25,23 @@ #include "config.h" #include "YarrJIT.h" + #include <wtf/ASCIICType.h> +#include "LinkBuffer.h" #include "Options.h" +#include "VM.h" #include "Yarr.h" -#include "YarrCanonicalizeUCS2.h" +#include "YarrCanonicalize.h" #if ENABLE(YARR_JIT) -#include "LinkBuffer.h" - using namespace WTF; namespace JSC { namespace Yarr { template<YarrJITCompileMode compileMode> class YarrGenerator : private DefaultMacroAssembler { - friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); + friend void jitCompile(VM*, YarrCodeBlock&, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); #if CPU(ARM) static const RegisterID input = ARMRegisters::r0; @@ -50,20 +51,38 @@ class YarrGenerator : private DefaultMacroAssembler { static const RegisterID regT0 = ARMRegisters::r4; static const RegisterID regT1 = ARMRegisters::r5; + static const RegisterID initialStart = ARMRegisters::r8; static const RegisterID returnRegister = ARMRegisters::r0; static const RegisterID returnRegister2 = ARMRegisters::r1; + +#define HAVE_INITIAL_START_REG #elif CPU(ARM64) + // Argument registers static const RegisterID input = ARM64Registers::x0; static const RegisterID index = ARM64Registers::x1; static const RegisterID length = ARM64Registers::x2; static const RegisterID output = ARM64Registers::x3; - - static const RegisterID regT0 = ARM64Registers::x4; - static const RegisterID regT1 = ARM64Registers::x5; + static const RegisterID freelistRegister = ARM64Registers::x4; + static const RegisterID freelistSizeRegister = ARM64Registers::x5; + + // Scratch registers + static const RegisterID regT0 = ARM64Registers::x6; + static const RegisterID regT1 = ARM64Registers::x7; + static const RegisterID regT2 = ARM64Registers::x8; + static const RegisterID remainingMatchCount = ARM64Registers::x9; + static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x10; + static const RegisterID initialStart = ARM64Registers::x11; + static const RegisterID supplementaryPlanesBase = ARM64Registers::x12; + static const RegisterID surrogateTagMask = ARM64Registers::x13; + static const RegisterID leadingSurrogateTag = ARM64Registers::x14; + static const RegisterID trailingSurrogateTag = ARM64Registers::x15; static const RegisterID returnRegister = ARM64Registers::x0; static const RegisterID returnRegister2 = ARM64Registers::x1; + +#define HAVE_INITIAL_START_REG +#define JIT_UNICODE_EXPRESSIONS #elif CPU(MIPS) static const RegisterID input = MIPSRegisters::a0; static const RegisterID index = MIPSRegisters::a1; @@ -72,20 +91,12 @@ class YarrGenerator : private DefaultMacroAssembler { static const RegisterID regT0 = MIPSRegisters::t4; static const RegisterID regT1 = MIPSRegisters::t5; + static const RegisterID initialStart = MIPSRegisters::t6; static const RegisterID returnRegister = MIPSRegisters::v0; static const RegisterID returnRegister2 = MIPSRegisters::v1; -#elif CPU(SH4) - static const RegisterID input = SH4Registers::r4; - static const RegisterID index = SH4Registers::r5; - static const RegisterID length = SH4Registers::r6; - static const RegisterID output = SH4Registers::r7; - static const RegisterID regT0 = SH4Registers::r0; - static const RegisterID regT1 = SH4Registers::r1; - - static const RegisterID returnRegister = SH4Registers::r0; - static const RegisterID returnRegister2 = SH4Registers::r1; +#define HAVE_INITIAL_START_REG #elif CPU(X86) static const RegisterID input = X86Registers::eax; static const RegisterID index = X86Registers::edx; @@ -99,10 +110,13 @@ class YarrGenerator : private DefaultMacroAssembler { static const RegisterID returnRegister2 = X86Registers::edx; #elif CPU(X86_64) #if !OS(WINDOWS) + // Argument registers static const RegisterID input = X86Registers::edi; static const RegisterID index = X86Registers::esi; static const RegisterID length = X86Registers::edx; static const RegisterID output = X86Registers::ecx; + static const RegisterID freelistRegister = X86Registers::r8; + static const RegisterID freelistSizeRegister = X86Registers::r9; // Only used during initialization. #else // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted. // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx @@ -113,11 +127,186 @@ class YarrGenerator : private DefaultMacroAssembler { static const RegisterID output = X86Registers::r10; #endif + // Scratch registers static const RegisterID regT0 = X86Registers::eax; - static const RegisterID regT1 = X86Registers::ebx; +#if !OS(WINDOWS) + static const RegisterID regT1 = X86Registers::r9; + static const RegisterID regT2 = X86Registers::r10; +#else + static const RegisterID regT1 = X86Registers::ecx; + static const RegisterID regT2 = X86Registers::edi; +#endif + + static const RegisterID initialStart = X86Registers::ebx; +#if !OS(WINDOWS) + static const RegisterID remainingMatchCount = X86Registers::r12; +#else + static const RegisterID remainingMatchCount = X86Registers::esi; +#endif + static const RegisterID regUnicodeInputAndTrail = X86Registers::r13; + static const RegisterID leadingSurrogateTag = X86Registers::r14; + static const RegisterID trailingSurrogateTag = X86Registers::r15; static const RegisterID returnRegister = X86Registers::eax; static const RegisterID returnRegister2 = X86Registers::edx; + + const TrustedImm32 supplementaryPlanesBase = TrustedImm32(0x10000); + const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00); +#define HAVE_INITIAL_START_REG +#define JIT_UNICODE_EXPRESSIONS +#endif + +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + struct ParenContextSizes { + size_t m_numSubpatterns; + size_t m_frameSlots; + + ParenContextSizes(size_t numSubpatterns, size_t frameSlots) + : m_numSubpatterns(numSubpatterns) + , m_frameSlots(frameSlots) + { + } + + size_t numSubpatterns() { return m_numSubpatterns; } + + size_t frameSlots() { return m_frameSlots; } + }; + + struct ParenContext { + struct ParenContext* next; + uint32_t begin; + uint32_t matchAmount; + uintptr_t returnAddress; + struct Subpatterns { + unsigned start; + unsigned end; + } subpatterns[0]; + uintptr_t frameSlots[0]; + + static size_t sizeFor(ParenContextSizes& parenContextSizes) + { + return sizeof(ParenContext) + sizeof(Subpatterns) * parenContextSizes.numSubpatterns() + sizeof(uintptr_t) * parenContextSizes.frameSlots(); + } + + static ptrdiff_t nextOffset() + { + return offsetof(ParenContext, next); + } + + static ptrdiff_t beginOffset() + { + return offsetof(ParenContext, begin); + } + + static ptrdiff_t matchAmountOffset() + { + return offsetof(ParenContext, matchAmount); + } + + static ptrdiff_t returnAddressOffset() + { + return offsetof(ParenContext, returnAddress); + } + + static ptrdiff_t subpatternOffset(size_t subpattern) + { + return offsetof(ParenContext, subpatterns) + (subpattern - 1) * sizeof(Subpatterns); + } + + static ptrdiff_t savedFrameOffset(ParenContextSizes& parenContextSizes) + { + return offsetof(ParenContext, subpatterns) + (parenContextSizes.numSubpatterns()) * sizeof(Subpatterns); + } + }; + + void initParenContextFreeList() + { + RegisterID parenContextPointer = regT0; + RegisterID nextParenContextPointer = regT2; + + size_t parenContextSize = ParenContext::sizeFor(m_parenContextSizes); + + parenContextSize = WTF::roundUpToMultipleOf<sizeof(uintptr_t)>(parenContextSize); + + // Check that the paren context is a reasonable size. + if (parenContextSize > INT16_MAX) + m_abortExecution.append(jump()); + + Jump emptyFreeList = branchTestPtr(Zero, freelistRegister); + move(freelistRegister, parenContextPointer); + addPtr(TrustedImm32(parenContextSize), freelistRegister, nextParenContextPointer); + addPtr(freelistRegister, freelistSizeRegister); + subPtr(TrustedImm32(parenContextSize), freelistSizeRegister); + + Label loopTop(this); + Jump initDone = branchPtr(Above, nextParenContextPointer, freelistSizeRegister); + storePtr(nextParenContextPointer, Address(parenContextPointer, ParenContext::nextOffset())); + move(nextParenContextPointer, parenContextPointer); + addPtr(TrustedImm32(parenContextSize), parenContextPointer, nextParenContextPointer); + jump(loopTop); + + initDone.link(this); + storePtr(TrustedImmPtr(nullptr), Address(parenContextPointer, ParenContext::nextOffset())); + emptyFreeList.link(this); + } + + void allocateParenContext(RegisterID result) + { + m_abortExecution.append(branchTestPtr(Zero, freelistRegister)); + sub32(TrustedImm32(1), remainingMatchCount); + m_hitMatchLimit.append(branchTestPtr(Zero, remainingMatchCount)); + move(freelistRegister, result); + loadPtr(Address(freelistRegister, ParenContext::nextOffset()), freelistRegister); + } + + void freeParenContext(RegisterID headPtrRegister, RegisterID newHeadPtrRegister) + { + loadPtr(Address(headPtrRegister, ParenContext::nextOffset()), newHeadPtrRegister); + storePtr(freelistRegister, Address(headPtrRegister, ParenContext::nextOffset())); + move(headPtrRegister, freelistRegister); + } + + void saveParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation) + { + store32(index, Address(parenContextReg, ParenContext::beginOffset())); + loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), tempReg); + store32(tempReg, Address(parenContextReg, ParenContext::matchAmountOffset())); + loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex(), tempReg); + storePtr(tempReg, Address(parenContextReg, ParenContext::returnAddressOffset())); + if (compileMode == IncludeSubpatterns) { + for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) { + loadPtr(Address(output, (subpattern << 1) * sizeof(unsigned)), tempReg); + storePtr(tempReg, Address(parenContextReg, ParenContext::subpatternOffset(subpattern))); + clearSubpatternStart(subpattern); + } + } + subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses; + for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) { + loadFromFrame(frameLocation, tempReg); + storePtr(tempReg, Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t))); + } + } + + void restoreParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation) + { + load32(Address(parenContextReg, ParenContext::beginOffset()), index); + storeToFrame(index, subpatternBaseFrameLocation + BackTrackInfoParentheses::beginIndex()); + load32(Address(parenContextReg, ParenContext::matchAmountOffset()), tempReg); + storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex()); + loadPtr(Address(parenContextReg, ParenContext::returnAddressOffset()), tempReg); + storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex()); + if (compileMode == IncludeSubpatterns) { + for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) { + loadPtr(Address(parenContextReg, ParenContext::subpatternOffset(subpattern)), tempReg); + storePtr(tempReg, Address(output, (subpattern << 1) * sizeof(unsigned))); + } + } + subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses; + for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) { + loadPtr(Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)), tempReg); + storeToFrame(tempReg, frameLocation); + } + } #endif void optimizeAlternative(PatternAlternative* alternative) @@ -129,8 +318,10 @@ class YarrGenerator : private DefaultMacroAssembler { PatternTerm& term = alternative->m_terms[i]; PatternTerm& nextTerm = alternative->m_terms[i + 1]; + // We can move BMP only character classes after fixed character terms. if ((term.type == PatternTerm::TypeCharacterClass) && (term.quantityType == QuantifierFixedCount) + && (!m_decodeSurrogatePairs || (!term.characterClass->m_hasNonBMPCharacters && !term.m_invert)) && (nextTerm.type == PatternTerm::TypePatternCharacter) && (nextTerm.quantityType == QuantifierFixedCount)) { PatternTerm termCopy = term; @@ -140,7 +331,7 @@ class YarrGenerator : private DefaultMacroAssembler { } } - void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) + void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar32* matches, unsigned matchCount) { do { // pick which range we're going to generate @@ -189,26 +380,28 @@ class YarrGenerator : private DefaultMacroAssembler { void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) { - if (charClass->m_table) { + if (charClass->m_table && !m_decodeSurrogatePairs) { ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table)); matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry)); return; } - Jump unicodeFail; + JumpList unicodeFail; if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { - Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f)); + JumpList isAscii; + if (charClass->m_matches.size() || charClass->m_ranges.size()) + isAscii.append(branch32(LessThanOrEqual, character, TrustedImm32(0x7f))); if (charClass->m_matchesUnicode.size()) { for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { - UChar ch = charClass->m_matchesUnicode[i]; + UChar32 ch = charClass->m_matchesUnicode[i]; matchDest.append(branch32(Equal, character, Imm32(ch))); } } if (charClass->m_rangesUnicode.size()) { for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { - UChar lo = charClass->m_rangesUnicode[i].begin; - UChar hi = charClass->m_rangesUnicode[i].end; + UChar32 lo = charClass->m_rangesUnicode[i].begin; + UChar32 hi = charClass->m_rangesUnicode[i].end; Jump below = branch32(LessThan, character, Imm32(lo)); matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); @@ -216,18 +409,16 @@ class YarrGenerator : private DefaultMacroAssembler { } } - unicodeFail = jump(); + if (charClass->m_matches.size() || charClass->m_ranges.size()) + unicodeFail = jump(); isAscii.link(this); } if (charClass->m_ranges.size()) { unsigned matchIndex = 0; JumpList failures; - ASSERT(charClass->m_ranges.size() <= UINT_MAX); - matchCharacterClassRange(character, failures, matchDest, &charClass->m_ranges[0], - static_cast<unsigned>(charClass->m_ranges.size()), - &matchIndex, charClass->m_matches.isEmpty() ? 0 : &charClass->m_matches[0], - static_cast<unsigned>(charClass->m_matches.size())); + matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.data(), charClass->m_ranges.size(), + &matchIndex, charClass->m_matches.data(), charClass->m_matches.size()); while (matchIndex < charClass->m_matches.size()) matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); @@ -238,7 +429,7 @@ class YarrGenerator : private DefaultMacroAssembler { for (unsigned i = 0; i < charClass->m_matches.size(); ++i) { char ch = charClass->m_matches[i]; - if (m_pattern.m_ignoreCase) { + if (m_pattern.ignoreCase()) { if (isASCIILower(ch)) { matchesAZaz.append(ch); continue; @@ -249,8 +440,7 @@ class YarrGenerator : private DefaultMacroAssembler { matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); } - ASSERT(matchesAZaz.size() <= UINT_MAX); - if (unsigned countAZaz = static_cast<int>(matchesAZaz.size())) { + if (unsigned countAZaz = matchesAZaz.size()) { or32(TrustedImm32(32), character); for (unsigned i = 0; i < countAZaz; ++i) matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i]))); @@ -290,29 +480,102 @@ class YarrGenerator : private DefaultMacroAssembler { return branch32(NotEqual, index, length); } - Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character) + BaseIndex negativeOffsetIndexedAddress(Checked<unsigned> negativeCharacterOffset, RegisterID tempReg, RegisterID indexReg = index) { - readCharacter(inputPosition, character); - - // For case-insesitive compares, non-ascii characters that have different - // upper & lower case representations are converted to a character class. - ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); - if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { - or32(TrustedImm32(0x20), character); - ch |= 0x20; + RegisterID base = input; + + // BaseIndex() addressing can take a int32_t offset. Given that we can have a regular + // expression that has unsigned character offsets, BaseIndex's signed offset is insufficient + // for addressing in extreme cases where we might underflow. Therefore we check to see if + // negativeCharacterOffset will underflow directly or after converting for 16 bit characters. + // If so, we do our own address calculating by adjusting the base, using the result register + // as a temp address register. + unsigned maximumNegativeOffsetForCharacterSize = m_charSize == Char8 ? 0x7fffffff : 0x3fffffff; + unsigned offsetAdjustAmount = 0x40000000; + if (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) { + base = tempReg; + move(input, base); + while (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) { + subPtr(TrustedImm32(offsetAdjustAmount), base); + if (m_charSize != Char8) + subPtr(TrustedImm32(offsetAdjustAmount), base); + negativeCharacterOffset -= offsetAdjustAmount; + } } - return branch32(NotEqual, character, Imm32(ch)); + Checked<int32_t> characterOffset(-static_cast<int32_t>(negativeCharacterOffset.unsafeGet())); + + if (m_charSize == Char8) + return BaseIndex(input, indexReg, TimesOne, (characterOffset * static_cast<int32_t>(sizeof(char))).unsafeGet()); + + return BaseIndex(input, indexReg, TimesTwo, (characterOffset * static_cast<int32_t>(sizeof(UChar))).unsafeGet()); + } + +#ifdef JIT_UNICODE_EXPRESSIONS + void tryReadUnicodeCharImpl(RegisterID resultReg) + { + ASSERT(m_charSize == Char16); + + JumpList notUnicode; + load16Unaligned(regUnicodeInputAndTrail, resultReg); + and32(surrogateTagMask, resultReg, regT2); + notUnicode.append(branch32(NotEqual, regT2, leadingSurrogateTag)); + addPtr(TrustedImm32(2), regUnicodeInputAndTrail); + getEffectiveAddress(BaseIndex(input, length, TimesTwo), regT2); + notUnicode.append(branch32(AboveOrEqual, regUnicodeInputAndTrail, regT2)); + load16Unaligned(Address(regUnicodeInputAndTrail), regUnicodeInputAndTrail); + and32(surrogateTagMask, regUnicodeInputAndTrail, regT2); + notUnicode.append(branch32(NotEqual, regT2, trailingSurrogateTag)); + sub32(leadingSurrogateTag, resultReg); + sub32(trailingSurrogateTag, regUnicodeInputAndTrail); + lshift32(TrustedImm32(10), resultReg); + or32(regUnicodeInputAndTrail, resultReg); + add32(supplementaryPlanesBase, resultReg); + notUnicode.link(this); + } + + void tryReadUnicodeChar(BaseIndex address, RegisterID resultReg) + { + ASSERT(m_charSize == Char16); + + getEffectiveAddress(address, regUnicodeInputAndTrail); + + if (resultReg == regT0) + m_tryReadUnicodeCharacterCalls.append(nearCall()); + else + tryReadUnicodeCharImpl(resultReg); } +#endif - void readCharacter(int inputPosition, RegisterID reg) + void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index) { + BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg); + if (m_charSize == Char8) - load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg); + load8(address, resultReg); +#ifdef JIT_UNICODE_EXPRESSIONS + else if (m_decodeSurrogatePairs) + tryReadUnicodeChar(address, resultReg); +#endif else - load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg); + load16Unaligned(address, resultReg); } + Jump jumpIfCharNotEquals(UChar32 ch, Checked<unsigned> negativeCharacterOffset, RegisterID character) + { + readCharacter(negativeCharacterOffset, character); + + // For case-insesitive compares, non-ascii characters that have different + // upper & lower case representations are converted to a character class. + ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode)); + if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) { + or32(TrustedImm32(0x20), character); + ch |= 0x20; + } + + return branch32(NotEqual, character, Imm32(ch)); + } + void storeToFrame(RegisterID reg, unsigned frameLocation) { poke(reg, frameLocation); @@ -323,9 +586,16 @@ class YarrGenerator : private DefaultMacroAssembler { poke(imm, frameLocation); } +#if CPU(ARM64) || CPU(X86_64) + void storeToFrame(TrustedImmPtr imm, unsigned frameLocation) + { + poke(imm, frameLocation); + } +#endif + DataLabelPtr storeToFrameWithPatch(unsigned frameLocation) { - return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*))); + return storePtrWithPatch(TrustedImmPtr(nullptr), Address(stackPointerRegister, frameLocation * sizeof(void*))); } void loadFromFrame(unsigned frameLocation, RegisterID reg) @@ -340,32 +610,82 @@ class YarrGenerator : private DefaultMacroAssembler { unsigned alignCallFrameSizeInBytes(unsigned callFrameSize) { + if (!callFrameSize) + return 0; + callFrameSize *= sizeof(void*); if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize) CRASH(); - // Originally, the code was: -// callFrameSize = (callFrameSize + 0x3f) & ~0x3f; - // However, 64 bytes is a bit surprising. The biggest "alignment" requirement is on Aarch64, where: - // "SP mod 16 = 0. The stack must be quad-word aligned." (IHI0055B_aapcs64.pdf) - callFrameSize = (callFrameSize + 0xf) & ~0xf; - if (!callFrameSize) - CRASH(); + callFrameSize = (callFrameSize + 0x3f) & ~0x3f; return callFrameSize; } void initCallFrame() { - unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; - if (callFrameSize) - subPtr(Imm32(alignCallFrameSizeInBytes(callFrameSize)), stackPointerRegister); + unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize); + if (callFrameSizeInBytes) { +#if CPU(X86_64) || CPU(ARM64) + if (Options::zeroStackFrame()) { + // We need to start from the stack pointer, because we could have spilled callee saves + move(stackPointerRegister, regT0); + subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister); + if (callFrameSizeInBytes <= 128) { + for (unsigned offset = 0; offset < callFrameSizeInBytes; offset += sizeof(intptr_t)) + storePtr(TrustedImmPtr(0), Address(regT0, -8 - offset)); + } else { + Label zeroLoop = label(); + subPtr(TrustedImm32(sizeof(intptr_t) * 2), regT0); +#if CPU(ARM64) + storePair64(ARM64Registers::zr, ARM64Registers::zr, regT0); +#else + storePtr(TrustedImmPtr(0), Address(regT0)); + storePtr(TrustedImmPtr(0), Address(regT0, sizeof(intptr_t))); +#endif + branchPtr(NotEqual, regT0, stackPointerRegister).linkTo(zeroLoop, this); + } + } else +#endif + subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister); + + } } void removeCallFrame() { - unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; - if (callFrameSize) - addPtr(Imm32(alignCallFrameSizeInBytes(callFrameSize)), stackPointerRegister); + unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize); + if (callFrameSizeInBytes) + addPtr(Imm32(callFrameSizeInBytes), stackPointerRegister); + } + + void generateFailReturn() + { + move(TrustedImmPtr((void*)WTF::notFound), returnRegister); + move(TrustedImm32(0), returnRegister2); + generateReturn(); + } + + void generateJITFailReturn() + { + if (m_abortExecution.empty() && m_hitMatchLimit.empty()) + return; + + JumpList finishExiting; + if (!m_abortExecution.empty()) { + m_abortExecution.link(this); + move(TrustedImmPtr((void*)static_cast<size_t>(-2)), returnRegister); + finishExiting.append(jump()); + } + + if (!m_hitMatchLimit.empty()) { + m_hitMatchLimit.link(this); + move(TrustedImmPtr((void*)static_cast<size_t>(-1)), returnRegister); + } + + finishExiting.link(this); + removeCallFrame(); + move(TrustedImm32(0), returnRegister2); + generateReturn(); } - // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns. + // Used to record subpatterns, should only be called if compileMode is IncludeSubpatterns. void setSubpatternStart(RegisterID reg, unsigned subpattern) { ASSERT(subpattern); @@ -385,6 +705,12 @@ class YarrGenerator : private DefaultMacroAssembler { store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int))); } + void clearMatches(unsigned subpattern, unsigned lastSubpattern) + { + for (; subpattern <= lastSubpattern; subpattern++) + clearSubpatternStart(subpattern); + } + // We use one of three different strategies to track the start of the current match, // while matching. // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily @@ -427,18 +753,21 @@ class YarrGenerator : private DefaultMacroAssembler { OpNestedAlternativeNext, OpNestedAlternativeEnd, // Used for alternatives in subpatterns where there is only a single - // alternative (backtrackingis easier in these cases), or for alternatives + // alternative (backtracking is easier in these cases), or for alternatives // which never need to be backtracked (those in parenthetical assertions, // terminal subpatterns). OpSimpleNestedAlternativeBegin, OpSimpleNestedAlternativeNext, OpSimpleNestedAlternativeEnd, - // Used to wrap 'Once' subpattern matches (quantityCount == 1). + // Used to wrap 'Once' subpattern matches (quantityMaxCount == 1). OpParenthesesSubpatternOnceBegin, OpParenthesesSubpatternOnceEnd, // Used to wrap 'Terminal' subpattern matches (at the end of the regexp). OpParenthesesSubpatternTerminalBegin, OpParenthesesSubpatternTerminalEnd, + // Used to wrap generic captured matches + OpParenthesesSubpatternBegin, + OpParenthesesSubpatternEnd, // Used to wrap parenthetical assertions. OpParentheticalAssertionBegin, OpParentheticalAssertionEnd, @@ -468,16 +797,16 @@ class YarrGenerator : private DefaultMacroAssembler { // The operation, as a YarrOpCode, and also a reference to the PatternTerm. YarrOpCode m_op; - PatternTerm* m_term = nullptr; + PatternTerm* m_term; // For alternatives, this holds the PatternAlternative and doubly linked // references to this alternative's siblings. In the case of the // OpBodyAlternativeEnd node at the end of a section of repeating nodes, // m_nextOp will reference the OpBodyAlternativeBegin node of the first // repeating alternative. - PatternAlternative* m_alternative = nullptr; - size_t m_previousOp = 0; - size_t m_nextOp = 0; + PatternAlternative* m_alternative; + size_t m_previousOp; + size_t m_nextOp; // Used to record a set of Jumps out of the generated code, typically // used for jumps out to backtracking code, and a single reentry back @@ -495,9 +824,9 @@ class YarrGenerator : private DefaultMacroAssembler { bool m_isDeadCode; // Currently used in the case of some of the more complex management of - // 'm_checked', to cache the offset used in this alternative, to avoid + // 'm_checkedOffset', to cache the offset used in this alternative, to avoid // recalculating it. - int m_checkAdjust; + Checked<unsigned> m_checkAdjust; // Used by OpNestedAlternativeNext/End to hold the pointer to the // value that will be pushed into the pattern's frame to return to, @@ -599,7 +928,7 @@ class YarrGenerator : private DefaultMacroAssembler { } // Called at the end of code generation to link all return addresses. - void linkDataLabels(LinkBuffer<JSC::DefaultMacroAssembler>& linkBuffer) + void linkDataLabels(DefaultLinkBuffer& linkBuffer) { ASSERT(isEmpty()); for (unsigned i = 0; i < m_backtrackRecords.size(); ++i) @@ -642,14 +971,14 @@ class YarrGenerator : private DefaultMacroAssembler { YarrOp& op = m_ops[opIndex]; PatternTerm* term = op.m_term; - if (m_pattern.m_multiline) { + if (m_pattern.multiline()) { const RegisterID character = regT0; JumpList matchDest; if (!term->inputPosition) - matchDest.append(branch32(Equal, index, Imm32(m_checked))); + matchDest.append(branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet()))); - readCharacter((term->inputPosition - m_checked) - 1, character); + readCharacter(m_checkedOffset - term->inputPosition + 1, character); matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); op.m_jumps.append(jump()); @@ -659,7 +988,7 @@ class YarrGenerator : private DefaultMacroAssembler { if (term->inputPosition) op.m_jumps.append(jump()); else - op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked))); + op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checkedOffset.unsafeGet()))); } } void backtrackAssertionBOL(size_t opIndex) @@ -672,20 +1001,20 @@ class YarrGenerator : private DefaultMacroAssembler { YarrOp& op = m_ops[opIndex]; PatternTerm* term = op.m_term; - if (m_pattern.m_multiline) { + if (m_pattern.multiline()) { const RegisterID character = regT0; JumpList matchDest; - if (term->inputPosition == m_checked) + if (term->inputPosition == m_checkedOffset.unsafeGet()) matchDest.append(atEndOfInput()); - readCharacter(term->inputPosition - m_checked, character); + readCharacter(m_checkedOffset - term->inputPosition, character); matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); op.m_jumps.append(jump()); matchDest.link(this); } else { - if (term->inputPosition == m_checked) + if (term->inputPosition == m_checkedOffset.unsafeGet()) op.m_jumps.append(notAtEndOfInput()); // Erk, really should poison out these alternatives early. :-/ else @@ -705,11 +1034,19 @@ class YarrGenerator : private DefaultMacroAssembler { const RegisterID character = regT0; - if (term->inputPosition == m_checked) + if (term->inputPosition == m_checkedOffset.unsafeGet()) nextIsNotWordChar.append(atEndOfInput()); - readCharacter((term->inputPosition - m_checked), character); - matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); + readCharacter(m_checkedOffset - term->inputPosition, character); + + CharacterClass* wordcharCharacterClass; + + if (m_unicodeIgnoreCase) + wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass(); + else + wordcharCharacterClass = m_pattern.wordcharCharacterClass(); + + matchCharacterClass(character, nextIsWordChar, wordcharCharacterClass); } void generateAssertionWordBoundary(size_t opIndex) @@ -722,9 +1059,17 @@ class YarrGenerator : private DefaultMacroAssembler { Jump atBegin; JumpList matchDest; if (!term->inputPosition) - atBegin = branch32(Equal, index, Imm32(m_checked)); - readCharacter((term->inputPosition - m_checked) - 1, character); - matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); + atBegin = branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet())); + readCharacter(m_checkedOffset - term->inputPosition + 1, character); + + CharacterClass* wordcharCharacterClass; + + if (m_unicodeIgnoreCase) + wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass(); + else + wordcharCharacterClass = m_pattern.wordcharCharacterClass(); + + matchCharacterClass(character, matchDest, wordcharCharacterClass); if (!term->inputPosition) atBegin.link(this); @@ -775,7 +1120,7 @@ class YarrGenerator : private DefaultMacroAssembler { YarrOp* nextOp = &m_ops[opIndex + 1]; PatternTerm* term = op.m_term; - UChar ch = term->patternCharacter; + UChar32 ch = term->patternCharacter; if ((ch > 0xff) && (m_charSize == Char8)) { // Have a 16 bit pattern character and an 8 bit string - short circuit @@ -784,21 +1129,21 @@ class YarrGenerator : private DefaultMacroAssembler { } const RegisterID character = regT0; - int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2; + unsigned maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2; unsigned ignoreCaseMask = 0; #if CPU(BIG_ENDIAN) int allCharacters = ch << (m_charSize == Char8 ? 24 : 16); #else int allCharacters = ch; #endif - int numberCharacters; - int startTermPosition = term->inputPosition; + unsigned numberCharacters; + unsigned startTermPosition = term->inputPosition; // For case-insesitive compares, non-ascii characters that have different // upper & lower case representations are converted to a character class. - ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); + ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode)); - if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) + if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) #if CPU(BIG_ENDIAN) ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16); #else @@ -810,8 +1155,9 @@ class YarrGenerator : private DefaultMacroAssembler { if (nextTerm->type != PatternTerm::TypePatternCharacter || nextTerm->quantityType != QuantifierFixedCount - || nextTerm->quantityCount != 1 - || nextTerm->inputPosition != (startTermPosition + numberCharacters)) + || nextTerm->quantityMaxCount != 1 + || nextTerm->inputPosition != (startTermPosition + numberCharacters) + || (U16_LENGTH(nextTerm->patternCharacter) != 1 && m_decodeSurrogatePairs)) break; nextOp->m_isDeadCode = true; @@ -822,7 +1168,7 @@ class YarrGenerator : private DefaultMacroAssembler { int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters; #endif - UChar currentCharacter = nextTerm->patternCharacter; + UChar32 currentCharacter = nextTerm->patternCharacter; if ((currentCharacter > 0xff) && (m_charSize == Char8)) { // Have a 16 bit pattern character and an 8 bit string - short circuit @@ -832,47 +1178,43 @@ class YarrGenerator : private DefaultMacroAssembler { // For case-insesitive compares, non-ascii characters that have different // upper & lower case representations are converted to a character class. - ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter)); + ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter, m_canonicalMode)); allCharacters |= (currentCharacter << shiftAmount); - if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter))) + if ((m_pattern.ignoreCase()) && (isASCIIAlpha(currentCharacter))) ignoreCaseMask |= 32 << shiftAmount; } if (m_charSize == Char8) { switch (numberCharacters) { case 1: - op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character)); + op.m_jumps.append(jumpIfCharNotEquals(ch, m_checkedOffset - startTermPosition, character)); return; case 2: { - BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); - load16Unaligned(address, character); + load16Unaligned(negativeOffsetIndexedAddress(m_checkedOffset - startTermPosition, character), character); break; } case 3: { - BaseIndex highAddress(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); - load16Unaligned(highAddress, character); + load16Unaligned(negativeOffsetIndexedAddress(m_checkedOffset - startTermPosition, character), character); if (ignoreCaseMask) or32(Imm32(ignoreCaseMask), character); op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask))); - op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, startTermPosition + 2 - m_checked, character)); + op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, m_checkedOffset - startTermPosition - 2, character)); return; } case 4: { - BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); - load32WithUnalignedHalfWords(address, character); + load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(m_checkedOffset- startTermPosition, character), character); break; } } } else { switch (numberCharacters) { case 1: - op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); + op.m_jumps.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character)); return; case 2: - BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar)); - load32WithUnalignedHalfWords(address, character); + load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(m_checkedOffset- term->inputPosition, character), character); break; } } @@ -891,32 +1233,33 @@ class YarrGenerator : private DefaultMacroAssembler { { YarrOp& op = m_ops[opIndex]; PatternTerm* term = op.m_term; - UChar ch = term->patternCharacter; + UChar32 ch = term->patternCharacter; const RegisterID character = regT0; const RegisterID countRegister = regT1; move(index, countRegister); - sub32(Imm32(term->quantityCount.unsafeGet()), countRegister); + Checked<unsigned> scaledMaxCount = term->quantityMaxCount; + scaledMaxCount *= U_IS_BMP(ch) ? 1 : 2; + sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister); Label loop(this); - BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet()); - - if (m_charSize == Char8) - load8(address, character); - else - load16(address, character); - + readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister); // For case-insesitive compares, non-ascii characters that have different // upper & lower case representations are converted to a character class. - ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); - if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { + ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode)); + if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) { or32(TrustedImm32(0x20), character); ch |= 0x20; } op.m_jumps.append(branch32(NotEqual, character, Imm32(ch))); - add32(TrustedImm32(1), countRegister); +#ifdef JIT_UNICODE_EXPRESSIONS + if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) + add32(TrustedImm32(2), countRegister); + else +#endif + add32(TrustedImm32(1), countRegister); branch32(NotEqual, countRegister, index).linkTo(loop, this); } void backtrackPatternCharacterFixed(size_t opIndex) @@ -928,7 +1271,7 @@ class YarrGenerator : private DefaultMacroAssembler { { YarrOp& op = m_ops[opIndex]; PatternTerm* term = op.m_term; - UChar ch = term->patternCharacter; + UChar32 ch = term->patternCharacter; const RegisterID character = regT0; const RegisterID countRegister = regT1; @@ -940,20 +1283,30 @@ class YarrGenerator : private DefaultMacroAssembler { JumpList failures; Label loop(this); failures.append(atEndOfInput()); - failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); + failures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character)); - add32(TrustedImm32(1), countRegister); add32(TrustedImm32(1), index); - if (term->quantityCount == quantifyInfinite) +#ifdef JIT_UNICODE_EXPRESSIONS + if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) { + Jump surrogatePairOk = notAtEndOfInput(); + sub32(TrustedImm32(1), index); + failures.append(jump()); + surrogatePairOk.link(this); + add32(TrustedImm32(1), index); + } +#endif + add32(TrustedImm32(1), countRegister); + + if (term->quantityMaxCount == quantifyInfinite) jump(loop); else - branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this); + branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this); failures.link(this); } op.m_reentry = label(); - storeToFrame(countRegister, term->frameLocation); + storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex()); } void backtrackPatternCharacterGreedy(size_t opIndex) { @@ -964,10 +1317,13 @@ class YarrGenerator : private DefaultMacroAssembler { m_backtrackingState.link(this); - loadFromFrame(term->frameLocation, countRegister); + loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister); m_backtrackingState.append(branchTest32(Zero, countRegister)); sub32(TrustedImm32(1), countRegister); - sub32(TrustedImm32(1), index); + if (!m_decodeSurrogatePairs || U_IS_BMP(term->patternCharacter)) + sub32(TrustedImm32(1), index); + else + sub32(TrustedImm32(2), index); jump(op.m_reentry); } @@ -980,36 +1336,50 @@ class YarrGenerator : private DefaultMacroAssembler { move(TrustedImm32(0), countRegister); op.m_reentry = label(); - storeToFrame(countRegister, term->frameLocation); + storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex()); } void backtrackPatternCharacterNonGreedy(size_t opIndex) { YarrOp& op = m_ops[opIndex]; PatternTerm* term = op.m_term; - UChar ch = term->patternCharacter; + UChar32 ch = term->patternCharacter; const RegisterID character = regT0; const RegisterID countRegister = regT1; m_backtrackingState.link(this); - loadFromFrame(term->frameLocation, countRegister); + loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister); // Unless have a 16 bit pattern character and an 8 bit string - short circuit if (!((ch > 0xff) && (m_charSize == Char8))) { JumpList nonGreedyFailures; nonGreedyFailures.append(atEndOfInput()); - if (term->quantityCount != quantifyInfinite) - nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet()))); - nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); + if (term->quantityMaxCount != quantifyInfinite) + nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet()))); + nonGreedyFailures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character)); - add32(TrustedImm32(1), countRegister); add32(TrustedImm32(1), index); +#ifdef JIT_UNICODE_EXPRESSIONS + if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) { + Jump surrogatePairOk = notAtEndOfInput(); + sub32(TrustedImm32(1), index); + nonGreedyFailures.append(jump()); + surrogatePairOk.link(this); + add32(TrustedImm32(1), index); + } +#endif + add32(TrustedImm32(1), countRegister); jump(op.m_reentry); nonGreedyFailures.link(this); } + if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) { + // subtract countRegister*2 for non-BMP characters + lshift32(TrustedImm32(1), countRegister); + } + sub32(countRegister, index); m_backtrackingState.fallthrough(); } @@ -1021,19 +1391,43 @@ class YarrGenerator : private DefaultMacroAssembler { const RegisterID character = regT0; + if (m_decodeSurrogatePairs) + storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex()); + JumpList matchDest; - readCharacter(term->inputPosition - m_checked, character); - matchCharacterClass(character, matchDest, term->characterClass); + readCharacter(m_checkedOffset - term->inputPosition, character); + // If we are matching the "any character" builtin class we only need to read the + // character and don't need to match as it will always succeed. + if (term->invert() || !term->characterClass->m_anyCharacter) { + matchCharacterClass(character, matchDest, term->characterClass); - if (term->invert()) - op.m_jumps.append(matchDest); - else { - op.m_jumps.append(jump()); - matchDest.link(this); + if (term->invert()) + op.m_jumps.append(matchDest); + else { + op.m_jumps.append(jump()); + matchDest.link(this); + } } +#ifdef JIT_UNICODE_EXPRESSIONS + if (m_decodeSurrogatePairs) { + Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase); + add32(TrustedImm32(1), index); + isBMPChar.link(this); + } +#endif } void backtrackCharacterClassOnce(size_t opIndex) { +#ifdef JIT_UNICODE_EXPRESSIONS + if (m_decodeSurrogatePairs) { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + m_backtrackingState.link(this); + loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index); + m_backtrackingState.fallthrough(); + } +#endif backtrackTermDefault(opIndex); } @@ -1046,24 +1440,34 @@ class YarrGenerator : private DefaultMacroAssembler { const RegisterID countRegister = regT1; move(index, countRegister); - sub32(Imm32(term->quantityCount.unsafeGet()), countRegister); + sub32(Imm32(term->quantityMaxCount.unsafeGet()), countRegister); Label loop(this); JumpList matchDest; - if (m_charSize == Char8) - load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character); - else - load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character); - matchCharacterClass(character, matchDest, term->characterClass); + readCharacter(m_checkedOffset - term->inputPosition - term->quantityMaxCount, character, countRegister); + // If we are matching the "any character" builtin class we only need to read the + // character and don't need to match as it will always succeed. + if (term->invert() || !term->characterClass->m_anyCharacter) { + matchCharacterClass(character, matchDest, term->characterClass); - if (term->invert()) - op.m_jumps.append(matchDest); - else { - op.m_jumps.append(jump()); - matchDest.link(this); + if (term->invert()) + op.m_jumps.append(matchDest); + else { + op.m_jumps.append(jump()); + matchDest.link(this); + } } add32(TrustedImm32(1), countRegister); +#ifdef JIT_UNICODE_EXPRESSIONS + if (m_decodeSurrogatePairs) { + Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase); + op.m_jumps.append(atEndOfInput()); + add32(TrustedImm32(1), countRegister); + add32(TrustedImm32(1), index); + isBMPChar.link(this); + } +#endif branch32(NotEqual, countRegister, index).linkTo(loop, this); } void backtrackCharacterClassFixed(size_t opIndex) @@ -1079,6 +1483,8 @@ class YarrGenerator : private DefaultMacroAssembler { const RegisterID character = regT0; const RegisterID countRegister = regT1; + if (m_decodeSurrogatePairs) + storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex()); move(TrustedImm32(0), countRegister); JumpList failures; @@ -1086,20 +1492,33 @@ class YarrGenerator : private DefaultMacroAssembler { failures.append(atEndOfInput()); if (term->invert()) { - readCharacter(term->inputPosition - m_checked, character); + readCharacter(m_checkedOffset - term->inputPosition, character); matchCharacterClass(character, failures, term->characterClass); } else { JumpList matchDest; - readCharacter(term->inputPosition - m_checked, character); - matchCharacterClass(character, matchDest, term->characterClass); - failures.append(jump()); + readCharacter(m_checkedOffset - term->inputPosition, character); + // If we are matching the "any character" builtin class we only need to read the + // character and don't need to match as it will always succeed. + if (!term->characterClass->m_anyCharacter) { + matchCharacterClass(character, matchDest, term->characterClass); + failures.append(jump()); + } matchDest.link(this); } - add32(TrustedImm32(1), countRegister); add32(TrustedImm32(1), index); - if (term->quantityCount != quantifyInfinite) { - branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this); +#ifdef JIT_UNICODE_EXPRESSIONS + if (m_decodeSurrogatePairs) { + failures.append(atEndOfInput()); + Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase); + add32(TrustedImm32(1), index); + isBMPChar.link(this); + } +#endif + add32(TrustedImm32(1), countRegister); + + if (term->quantityMaxCount != quantifyInfinite) { + branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this); failures.append(jump()); } else jump(loop); @@ -1107,7 +1526,7 @@ class YarrGenerator : private DefaultMacroAssembler { failures.link(this); op.m_reentry = label(); - storeToFrame(countRegister, term->frameLocation); + storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex()); } void backtrackCharacterClassGreedy(size_t opIndex) { @@ -1118,10 +1537,34 @@ class YarrGenerator : private DefaultMacroAssembler { m_backtrackingState.link(this); - loadFromFrame(term->frameLocation, countRegister); + loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister); m_backtrackingState.append(branchTest32(Zero, countRegister)); sub32(TrustedImm32(1), countRegister); - sub32(TrustedImm32(1), index); + if (!m_decodeSurrogatePairs) + sub32(TrustedImm32(1), index); + else { + const RegisterID character = regT0; + + loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index); + // Rematch one less + storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex()); + + Label rematchLoop(this); + readCharacter(m_checkedOffset - term->inputPosition, character); + + sub32(TrustedImm32(1), countRegister); + add32(TrustedImm32(1), index); + +#ifdef JIT_UNICODE_EXPRESSIONS + Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase); + add32(TrustedImm32(1), index); + isBMPChar.link(this); +#endif + + branchTest32(Zero, countRegister).linkTo(rematchLoop, this); + + loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister); + } jump(op.m_reentry); } @@ -1134,8 +1577,11 @@ class YarrGenerator : private DefaultMacroAssembler { move(TrustedImm32(0), countRegister); op.m_reentry = label(); - storeToFrame(countRegister, term->frameLocation); + if (m_decodeSurrogatePairs) + storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex()); + storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex()); } + void backtrackCharacterClassNonGreedy(size_t opIndex) { YarrOp& op = m_ops[opIndex]; @@ -1148,24 +1594,38 @@ class YarrGenerator : private DefaultMacroAssembler { m_backtrackingState.link(this); - loadFromFrame(term->frameLocation, countRegister); + if (m_decodeSurrogatePairs) + loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index); + loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister); nonGreedyFailures.append(atEndOfInput()); - nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet()))); + nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet()))); JumpList matchDest; - readCharacter(term->inputPosition - m_checked, character); - matchCharacterClass(character, matchDest, term->characterClass); + readCharacter(m_checkedOffset - term->inputPosition, character); + // If we are matching the "any character" builtin class we only need to read the + // character and don't need to match as it will always succeed. + if (term->invert() || !term->characterClass->m_anyCharacter) { + matchCharacterClass(character, matchDest, term->characterClass); - if (term->invert()) - nonGreedyFailures.append(matchDest); - else { - nonGreedyFailures.append(jump()); - matchDest.link(this); + if (term->invert()) + nonGreedyFailures.append(matchDest); + else { + nonGreedyFailures.append(jump()); + matchDest.link(this); + } } - add32(TrustedImm32(1), countRegister); add32(TrustedImm32(1), index); +#ifdef JIT_UNICODE_EXPRESSIONS + if (m_decodeSurrogatePairs) { + nonGreedyFailures.append(atEndOfInput()); + Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase); + add32(TrustedImm32(1), index); + isBMPChar.link(this); + } +#endif + add32(TrustedImm32(1), countRegister); jump(op.m_reentry); @@ -1181,15 +1641,28 @@ class YarrGenerator : private DefaultMacroAssembler { const RegisterID character = regT0; const RegisterID matchPos = regT1; +#ifndef HAVE_INITIAL_START_REG + const RegisterID initialStart = character; +#endif JumpList foundBeginningNewLine; JumpList saveStartIndex; JumpList foundEndingNewLine; + if (m_pattern.dotAll()) { + move(TrustedImm32(0), matchPos); + setMatchStart(matchPos); + move(length, index); + return; + } + ASSERT(!m_pattern.m_body->m_hasFixedSize); getMatchStart(matchPos); - saveStartIndex.append(branchTest32(Zero, matchPos)); +#ifndef HAVE_INITIAL_START_REG + loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart); +#endif + saveStartIndex.append(branch32(BelowOrEqual, matchPos, initialStart)); Label findBOLLoop(this); sub32(TrustedImm32(1), matchPos); if (m_charSize == Char8) @@ -1197,14 +1670,18 @@ class YarrGenerator : private DefaultMacroAssembler { else load16(BaseIndex(input, matchPos, TimesTwo, 0), character); matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass()); - branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this); + +#ifndef HAVE_INITIAL_START_REG + loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart); +#endif + branch32(Above, matchPos, initialStart).linkTo(findBOLLoop, this); saveStartIndex.append(jump()); foundBeginningNewLine.link(this); add32(TrustedImm32(1), matchPos); // Advance past newline saveStartIndex.link(this); - if (!m_pattern.m_multiline && term->anchors.bolAnchor) + if (!m_pattern.multiline() && term->anchors.bolAnchor) op.m_jumps.append(branchTest32(NonZero, matchPos)); ASSERT(!m_pattern.m_body->m_hasFixedSize); @@ -1224,7 +1701,7 @@ class YarrGenerator : private DefaultMacroAssembler { foundEndingNewLine.link(this); - if (!m_pattern.m_multiline && term->anchors.eolAnchor) + if (!m_pattern.multiline() && term->anchors.eolAnchor) op.m_jumps.append(branch32(NotEqual, matchPos, length)); move(matchPos, index); @@ -1247,7 +1724,7 @@ class YarrGenerator : private DefaultMacroAssembler { case PatternTerm::TypePatternCharacter: switch (term->quantityType) { case QuantifierFixedCount: - if (term->quantityCount == 1) + if (term->quantityMaxCount == 1) generatePatternCharacterOnce(opIndex); else generatePatternCharacterFixed(opIndex); @@ -1264,7 +1741,7 @@ class YarrGenerator : private DefaultMacroAssembler { case PatternTerm::TypeCharacterClass: switch (term->quantityType) { case QuantifierFixedCount: - if (term->quantityCount == 1) + if (term->quantityMaxCount == 1) generateCharacterClassOnce(opIndex); else generateCharacterClassFixed(opIndex); @@ -1297,7 +1774,7 @@ class YarrGenerator : private DefaultMacroAssembler { case PatternTerm::TypeParentheticalAssertion: RELEASE_ASSERT_NOT_REACHED(); case PatternTerm::TypeBackReference: - m_shouldFallBack = true; + m_failureReason = JITFailureReason::BackReference; break; case PatternTerm::TypeDotStarEnclosure: generateDotStarEnclosure(opIndex); @@ -1313,7 +1790,7 @@ class YarrGenerator : private DefaultMacroAssembler { case PatternTerm::TypePatternCharacter: switch (term->quantityType) { case QuantifierFixedCount: - if (term->quantityCount == 1) + if (term->quantityMaxCount == 1) backtrackPatternCharacterOnce(opIndex); else backtrackPatternCharacterFixed(opIndex); @@ -1330,7 +1807,7 @@ class YarrGenerator : private DefaultMacroAssembler { case PatternTerm::TypeCharacterClass: switch (term->quantityType) { case QuantifierFixedCount: - if (term->quantityCount == 1) + if (term->quantityMaxCount == 1) backtrackCharacterClassOnce(opIndex); else backtrackCharacterClassFixed(opIndex); @@ -1368,7 +1845,7 @@ class YarrGenerator : private DefaultMacroAssembler { break; case PatternTerm::TypeBackReference: - m_shouldFallBack = true; + m_failureReason = JITFailureReason::BackReference; break; } } @@ -1419,7 +1896,7 @@ class YarrGenerator : private DefaultMacroAssembler { // set as appropriate to this alternative. op.m_reentry = label(); - m_checked += alternative->m_minimumSize; + m_checkedOffset += alternative->m_minimumSize; break; } case OpBodyAlternativeNext: @@ -1472,8 +1949,8 @@ class YarrGenerator : private DefaultMacroAssembler { } if (op.m_op == OpBodyAlternativeNext) - m_checked += alternative->m_minimumSize; - m_checked -= priorAlternative->m_minimumSize; + m_checkedOffset += alternative->m_minimumSize; + m_checkedOffset -= priorAlternative->m_minimumSize; break; } @@ -1500,13 +1977,13 @@ class YarrGenerator : private DefaultMacroAssembler { PatternDisjunction* disjunction = term->parentheses.disjunction; // Calculate how much input we need to check for, and if non-zero check. - op.m_checkAdjust = alternative->m_minimumSize; + op.m_checkAdjust = Checked<unsigned>(alternative->m_minimumSize); if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) op.m_checkAdjust -= disjunction->m_minimumSize; if (op.m_checkAdjust) - op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); + op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet())); - m_checked += op.m_checkAdjust; + m_checkedOffset += op.m_checkAdjust; break; } case OpSimpleNestedAlternativeNext: @@ -1518,10 +1995,7 @@ class YarrGenerator : private DefaultMacroAssembler { // In the non-simple case, store a 'return address' so we can backtrack correctly. if (op.m_op == OpNestedAlternativeNext) { unsigned parenthesesFrameLocation = term->frameLocation; - unsigned alternativeFrameLocation = parenthesesFrameLocation; - if (term->quantityType != QuantifierFixedCount) - alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; - op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); + op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex()); } if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) { @@ -1554,11 +2028,11 @@ class YarrGenerator : private DefaultMacroAssembler { if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) op.m_checkAdjust -= disjunction->m_minimumSize; if (op.m_checkAdjust) - op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); + op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet())); YarrOp& lastOp = m_ops[op.m_previousOp]; - m_checked -= lastOp.m_checkAdjust; - m_checked += op.m_checkAdjust; + m_checkedOffset -= lastOp.m_checkAdjust; + m_checkedOffset += op.m_checkAdjust; break; } case OpSimpleNestedAlternativeEnd: @@ -1568,10 +2042,7 @@ class YarrGenerator : private DefaultMacroAssembler { // In the non-simple case, store a 'return address' so we can backtrack correctly. if (op.m_op == OpNestedAlternativeEnd) { unsigned parenthesesFrameLocation = term->frameLocation; - unsigned alternativeFrameLocation = parenthesesFrameLocation; - if (term->quantityType != QuantifierFixedCount) - alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; - op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); + op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex()); } if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) { @@ -1587,7 +2058,7 @@ class YarrGenerator : private DefaultMacroAssembler { op.m_jumps.clear(); YarrOp& lastOp = m_ops[op.m_previousOp]; - m_checked -= lastOp.m_checkAdjust; + m_checkedOffset -= lastOp.m_checkAdjust; break; } @@ -1599,7 +2070,7 @@ class YarrGenerator : private DefaultMacroAssembler { PatternTerm* term = op.m_term; unsigned parenthesesFrameLocation = term->frameLocation; const RegisterID indexTemporary = regT0; - ASSERT(term->quantityCount == 1); + ASSERT(term->quantityMaxCount == 1); // Upon entry to a Greedy quantified set of parenthese store the index. // We'll use this for two purposes: @@ -1616,12 +2087,12 @@ class YarrGenerator : private DefaultMacroAssembler { // // FIXME: for capturing parens, could use the index in the capture array? if (term->quantityType == QuantifierGreedy) - storeToFrame(index, parenthesesFrameLocation); + storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex()); else if (term->quantityType == QuantifierNonGreedy) { - storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); + storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex()); op.m_jumps.append(jump()); op.m_reentry = label(); - storeToFrame(index, parenthesesFrameLocation); + storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex()); } // If the parenthese are capturing, store the starting index value to the @@ -1631,12 +2102,12 @@ class YarrGenerator : private DefaultMacroAssembler { // offsets only afterwards, at the point the results array is // being accessed. if (term->capture() && compileMode == IncludeSubpatterns) { - int inputOffset = term->inputPosition - m_checked; + unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet(); if (term->quantityType == QuantifierFixedCount) - inputOffset -= term->parentheses.disjunction->m_minimumSize; + inputOffset += term->parentheses.disjunction->m_minimumSize; if (inputOffset) { move(index, indexTemporary); - add32(Imm32(inputOffset), indexTemporary); + sub32(Imm32(inputOffset), indexTemporary); setSubpatternStart(indexTemporary, term->parentheses.subpatternId); } else setSubpatternStart(index, term->parentheses.subpatternId); @@ -1646,18 +2117,16 @@ class YarrGenerator : private DefaultMacroAssembler { case OpParenthesesSubpatternOnceEnd: { PatternTerm* term = op.m_term; const RegisterID indexTemporary = regT0; - ASSERT(term->quantityCount == 1); + ASSERT(term->quantityMaxCount == 1); -#ifndef NDEBUG // Runtime ASSERT to make sure that the nested alternative handled the // "no input consumed" check. - if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) { + if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) { Jump pastBreakpoint; pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); - breakpoint(); + // ### abortWithReason(YARRNoInputConsumed); pastBreakpoint.link(this); } -#endif // If the parenthese are capturing, store the ending index value to the // captures array, offsetting as necessary. @@ -1666,10 +2135,10 @@ class YarrGenerator : private DefaultMacroAssembler { // offsets only afterwards, at the point the results array is // being accessed. if (term->capture() && compileMode == IncludeSubpatterns) { - int inputOffset = term->inputPosition - m_checked; + unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet(); if (inputOffset) { move(index, indexTemporary); - add32(Imm32(inputOffset), indexTemporary); + sub32(Imm32(inputOffset), indexTemporary); setSubpatternEnd(indexTemporary, term->parentheses.subpatternId); } else setSubpatternEnd(index, term->parentheses.subpatternId); @@ -1691,7 +2160,7 @@ class YarrGenerator : private DefaultMacroAssembler { case OpParenthesesSubpatternTerminalBegin: { PatternTerm* term = op.m_term; ASSERT(term->quantityType == QuantifierGreedy); - ASSERT(term->quantityCount == quantifyInfinite); + ASSERT(term->quantityMaxCount == quantifyInfinite); ASSERT(!term->capture()); // Upon entry set a label to loop back to. @@ -1699,23 +2168,23 @@ class YarrGenerator : private DefaultMacroAssembler { // Store the start index of the current match; we need to reject zero // length matches. - storeToFrame(index, term->frameLocation); + storeToFrame(index, term->frameLocation + BackTrackInfoParenthesesTerminal::beginIndex()); break; } case OpParenthesesSubpatternTerminalEnd: { YarrOp& beginOp = m_ops[op.m_previousOp]; -#ifndef NDEBUG - PatternTerm* term = op.m_term; - - // Runtime ASSERT to make sure that the nested alternative handled the - // "no input consumed" check. - Jump pastBreakpoint; - pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); - breakpoint(); - pastBreakpoint.link(this); -#endif + if (!ASSERT_DISABLED) { + PatternTerm* term = op.m_term; + + // Runtime ASSERT to make sure that the nested alternative handled the + // "no input consumed" check. + Jump pastBreakpoint; + pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); + // ### abortWithReason(YARRNoInputConsumed); + pastBreakpoint.link(this); + } - // We know that the match is non-zero, we can accept it and + // We know that the match is non-zero, we can accept it and // loop back up to the head of the subpattern. jump(beginOp.m_reentry); @@ -1725,6 +2194,131 @@ class YarrGenerator : private DefaultMacroAssembler { break; } + // OpParenthesesSubpatternBegin/End + // + // These nodes support generic subpatterns. + case OpParenthesesSubpatternBegin: { +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + PatternTerm* term = op.m_term; + unsigned parenthesesFrameLocation = term->frameLocation; + + // Upon entry to a Greedy quantified set of parenthese store the index. + // We'll use this for two purposes: + // - To indicate which iteration we are on of mathing the remainder of + // the expression after the parentheses - the first, including the + // match within the parentheses, or the second having skipped over them. + // - To check for empty matches, which must be rejected. + // + // At the head of a NonGreedy set of parentheses we'll immediately set the + // value on the stack to -1 (indicating a match skipping the subpattern), + // and plant a jump to the end. We'll also plant a label to backtrack to + // to reenter the subpattern later, with a store to set up index on the + // second iteration. + // + // FIXME: for capturing parens, could use the index in the capture array? + if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) { + storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex()); + storeToFrame(TrustedImmPtr(nullptr), parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex()); + + if (term->quantityType == QuantifierNonGreedy) { + storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()); + op.m_jumps.append(jump()); + } + + op.m_reentry = label(); + RegisterID currParenContextReg = regT0; + RegisterID newParenContextReg = regT1; + + loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg); + allocateParenContext(newParenContextReg); + storePtr(currParenContextReg, newParenContextReg); + storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex()); + saveParenContext(newParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation); + storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()); + } + + // If the parenthese are capturing, store the starting index value to the + // captures array, offsetting as necessary. + // + // FIXME: could avoid offsetting this value in JIT code, apply + // offsets only afterwards, at the point the results array is + // being accessed. + if (term->capture() && compileMode == IncludeSubpatterns) { + const RegisterID indexTemporary = regT0; + unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet(); + if (term->quantityType == QuantifierFixedCount) + inputOffset += term->parentheses.disjunction->m_minimumSize; + if (inputOffset) { + move(index, indexTemporary); + sub32(Imm32(inputOffset), indexTemporary); + setSubpatternStart(indexTemporary, term->parentheses.subpatternId); + } else + setSubpatternStart(index, term->parentheses.subpatternId); + } +#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS + RELEASE_ASSERT_NOT_REACHED(); +#endif + break; + } + case OpParenthesesSubpatternEnd: { +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + PatternTerm* term = op.m_term; + unsigned parenthesesFrameLocation = term->frameLocation; + + // Runtime ASSERT to make sure that the nested alternative handled the + // "no input consumed" check. + if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) { + Jump pastBreakpoint; + pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*))); + // ### abortWithReason(YARRNoInputConsumed); + pastBreakpoint.link(this); + } + + const RegisterID countTemporary = regT1; + + YarrOp& beginOp = m_ops[op.m_previousOp]; + loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary); + add32(TrustedImm32(1), countTemporary); + storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex()); + + // If the parenthese are capturing, store the ending index value to the + // captures array, offsetting as necessary. + // + // FIXME: could avoid offsetting this value in JIT code, apply + // offsets only afterwards, at the point the results array is + // being accessed. + if (term->capture() && compileMode == IncludeSubpatterns) { + const RegisterID indexTemporary = regT0; + + unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet(); + if (inputOffset) { + move(index, indexTemporary); + sub32(Imm32(inputOffset), indexTemporary); + setSubpatternEnd(indexTemporary, term->parentheses.subpatternId); + } else + setSubpatternEnd(index, term->parentheses.subpatternId); + } + + // If the parentheses are quantified Greedy then add a label to jump back + // to if get a failed match from after the parentheses. For NonGreedy + // parentheses, link the jump from before the subpattern to here. + if (term->quantityType == QuantifierGreedy) { + if (term->quantityMaxCount != quantifyInfinite) + branch32(Below, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(beginOp.m_reentry, this); + else + jump(beginOp.m_reentry); + + op.m_reentry = label(); + } else if (term->quantityType == QuantifierNonGreedy) { + YarrOp& beginOp = m_ops[op.m_previousOp]; + beginOp.m_jumps.link(this); + } +#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS + RELEASE_ASSERT_NOT_REACHED(); +#endif + break; + } + // OpParentheticalAssertionBegin/End case OpParentheticalAssertionBegin: { PatternTerm* term = op.m_term; @@ -1732,14 +2326,14 @@ class YarrGenerator : private DefaultMacroAssembler { // Store the current index - assertions should not update index, so // we will need to restore it upon a successful match. unsigned parenthesesFrameLocation = term->frameLocation; - storeToFrame(index, parenthesesFrameLocation); + storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex()); // Check - op.m_checkAdjust = m_checked - term->inputPosition; + op.m_checkAdjust = m_checkedOffset - term->inputPosition; if (op.m_checkAdjust) - sub32(Imm32(op.m_checkAdjust), index); + sub32(Imm32(op.m_checkAdjust.unsafeGet()), index); - m_checked -= op.m_checkAdjust; + m_checkedOffset -= op.m_checkAdjust; break; } case OpParentheticalAssertionEnd: { @@ -1747,7 +2341,7 @@ class YarrGenerator : private DefaultMacroAssembler { // Restore the input index value. unsigned parenthesesFrameLocation = term->frameLocation; - loadFromFrame(parenthesesFrameLocation, index); + loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex(), index); // If inverted, a successful match of the assertion must be treated // as a failure, so jump to backtracking. @@ -1757,15 +2351,13 @@ class YarrGenerator : private DefaultMacroAssembler { } YarrOp& lastOp = m_ops[op.m_previousOp]; - m_checked += lastOp.m_checkAdjust; + m_checkedOffset += lastOp.m_checkAdjust; break; } case OpMatchFailed: removeCallFrame(); - move(TrustedImmPtr((void*)WTF::notFound), returnRegister); - move(TrustedImm32(0), returnRegister2); - generateReturn(); + generateFailReturn(); break; } @@ -1817,9 +2409,9 @@ class YarrGenerator : private DefaultMacroAssembler { if (op.m_op == OpBodyAlternativeNext) { PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; - m_checked += priorAlternative->m_minimumSize; + m_checkedOffset += priorAlternative->m_minimumSize; } - m_checked -= alternative->m_minimumSize; + m_checkedOffset -= alternative->m_minimumSize; // Is this the last alternative? If not, then if we backtrack to this point we just // need to jump to try to match the next alternative. @@ -1836,6 +2428,8 @@ class YarrGenerator : private DefaultMacroAssembler { } bool onceThrough = endOp.m_nextOp == notFound; + + JumpList lastStickyAlternativeFailures; // First, generate code to handle cases where we backtrack out of an attempted match // of the last alternative. If this is a 'once through' set of alternatives then we @@ -1851,43 +2445,49 @@ class YarrGenerator : private DefaultMacroAssembler { && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1)) m_backtrackingState.linkTo(beginOp->m_reentry, this); - else { + else if (m_pattern.sticky() && m_ops[op.m_nextOp].m_op == OpBodyAlternativeEnd) { + // It is a sticky pattern and the last alternative failed, jump to the end. + m_backtrackingState.takeBacktracksToJumpList(lastStickyAlternativeFailures, this); + } else { // We need to generate a trampoline of code to execute before looping back // around to the first alternative. m_backtrackingState.link(this); - // If the pattern size is not fixed, then store the start index, for use if we match. - if (!m_pattern.m_body->m_hasFixedSize) { - if (alternative->m_minimumSize == 1) - setMatchStart(index); - else { - move(index, regT0); - if (alternative->m_minimumSize) - sub32(Imm32(alternative->m_minimumSize - 1), regT0); - else - add32(TrustedImm32(1), regT0); - setMatchStart(regT0); + // No need to advance and retry for a sticky pattern. + if (!m_pattern.sticky()) { + // If the pattern size is not fixed, then store the start index for use if we match. + if (!m_pattern.m_body->m_hasFixedSize) { + if (alternative->m_minimumSize == 1) + setMatchStart(index); + else { + move(index, regT0); + if (alternative->m_minimumSize) + sub32(Imm32(alternative->m_minimumSize - 1), regT0); + else + add32(TrustedImm32(1), regT0); + setMatchStart(regT0); + } } - } - // Generate code to loop. Check whether the last alternative is longer than the - // first (e.g. /a|xy/ or /a|xyz/). - if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) { - // We want to loop, and increment input position. If the delta is 1, it is - // already correctly incremented, if more than one then decrement as appropriate. - unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize; - ASSERT(delta); - if (delta != 1) - sub32(Imm32(delta - 1), index); - jump(beginOp->m_reentry); - } else { - // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot - // be sufficent input available to handle this, so just fall through. - unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize; - if (delta != 0xFFFFFFFFu) { - // We need to check input because we are incrementing the input. - add32(Imm32(delta + 1), index); - checkInput().linkTo(beginOp->m_reentry, this); + // Generate code to loop. Check whether the last alternative is longer than the + // first (e.g. /a|xy/ or /a|xyz/). + if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) { + // We want to loop, and increment input position. If the delta is 1, it is + // already correctly incremented, if more than one then decrement as appropriate. + unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize; + ASSERT(delta); + if (delta != 1) + sub32(Imm32(delta - 1), index); + jump(beginOp->m_reentry); + } else { + // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot + // be sufficent input available to handle this, so just fall through. + unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize; + if (delta != 0xFFFFFFFFu) { + // We need to check input because we are incrementing the input. + add32(Imm32(delta + 1), index); + checkInput().linkTo(beginOp->m_reentry, this); + } } } } @@ -1896,7 +2496,7 @@ class YarrGenerator : private DefaultMacroAssembler { // We can reach this point in the code in two ways: // - Fallthrough from the code above (a repeating alternative backtracked out of its // last alternative, and did not have sufficent input to run the first). - // - We will loop back up to the following label when a releating alternative loops, + // - We will loop back up to the following label when a repeating alternative loops, // following a failed input check. // // Either way, we have just failed the input check for the first alternative. @@ -1956,56 +2556,57 @@ class YarrGenerator : private DefaultMacroAssembler { needsToUpdateMatchStart = false; } - // Check whether there is sufficient input to loop. Increment the input position by - // one, and check. Also add in the minimum disjunction size before checking - there - // is no point in looping if we're just going to fail all the input checks around - // the next iteration. - ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize); - if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) { - // If the last alternative had the same minimum size as the disjunction, - // just simply increment input pos by 1, no adjustment based on minimum size. - add32(TrustedImm32(1), index); - } else { - // If the minumum for the last alternative was one greater than than that - // for the disjunction, we're already progressed by 1, nothing to do! - unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1; - if (delta) - sub32(Imm32(delta), index); - } - Jump matchFailed = jumpIfNoAvailableInput(); + if (!m_pattern.sticky()) { + // Check whether there is sufficient input to loop. Increment the input position by + // one, and check. Also add in the minimum disjunction size before checking - there + // is no point in looping if we're just going to fail all the input checks around + // the next iteration. + ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize); + if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) { + // If the last alternative had the same minimum size as the disjunction, + // just simply increment input pos by 1, no adjustment based on minimum size. + add32(TrustedImm32(1), index); + } else { + // If the minumum for the last alternative was one greater than than that + // for the disjunction, we're already progressed by 1, nothing to do! + unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1; + if (delta) + sub32(Imm32(delta), index); + } + Jump matchFailed = jumpIfNoAvailableInput(); + + if (needsToUpdateMatchStart) { + if (!m_pattern.m_body->m_minimumSize) + setMatchStart(index); + else { + move(index, regT0); + sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0); + setMatchStart(regT0); + } + } - if (needsToUpdateMatchStart) { - if (!m_pattern.m_body->m_minimumSize) - setMatchStart(index); + // Calculate how much more input the first alternative requires than the minimum + // for the body as a whole. If no more is needed then we dont need an additional + // input check here - jump straight back up to the start of the first alternative. + if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) + jump(beginOp->m_reentry); else { - move(index, regT0); - sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0); - setMatchStart(regT0); + if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize) + add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index); + else + sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index); + checkInput().linkTo(beginOp->m_reentry, this); + jump(firstInputCheckFailed); } - } - // Calculate how much more input the first alternative requires than the minimum - // for the body as a whole. If no more is needed then we dont need an additional - // input check here - jump straight back up to the start of the first alternative. - if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) - jump(beginOp->m_reentry); - else { - if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize) - add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index); - else - sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index); - checkInput().linkTo(beginOp->m_reentry, this); - jump(firstInputCheckFailed); + // We jump to here if we iterate to the point that there is insufficient input to + // run any matches, and need to return a failure state from JIT code. + matchFailed.link(this); } - // We jump to here if we iterate to the point that there is insufficient input to - // run any matches, and need to return a failure state from JIT code. - matchFailed.link(this); - + lastStickyAlternativeFailures.link(this); removeCallFrame(); - move(TrustedImmPtr((void*)WTF::notFound), returnRegister); - move(TrustedImm32(0), returnRegister2); - generateReturn(); + generateFailReturn(); break; } case OpBodyAlternativeEnd: { @@ -2013,7 +2614,7 @@ class YarrGenerator : private DefaultMacroAssembler { ASSERT(m_backtrackingState.isEmpty()); PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; - m_checked += priorAlternative->m_minimumSize; + m_checkedOffset += priorAlternative->m_minimumSize; break; } @@ -2064,7 +2665,7 @@ class YarrGenerator : private DefaultMacroAssembler { if (op.m_checkAdjust) { // Handle the cases where we need to link the backtracks here. m_backtrackingState.link(this); - sub32(Imm32(op.m_checkAdjust), index); + sub32(Imm32(op.m_checkAdjust.unsafeGet()), index); if (!isLastAlternative) { // An alternative that is not the last should jump to its successor. jump(nextOp.m_reentry); @@ -2114,9 +2715,9 @@ class YarrGenerator : private DefaultMacroAssembler { if (!isBegin) { YarrOp& lastOp = m_ops[op.m_previousOp]; - m_checked += lastOp.m_checkAdjust; + m_checkedOffset += lastOp.m_checkAdjust; } - m_checked -= op.m_checkAdjust; + m_checkedOffset -= op.m_checkAdjust; break; } case OpSimpleNestedAlternativeEnd: @@ -2136,10 +2737,7 @@ class YarrGenerator : private DefaultMacroAssembler { // Plant a jump to the return address. unsigned parenthesesFrameLocation = term->frameLocation; - unsigned alternativeFrameLocation = parenthesesFrameLocation; - if (term->quantityType != QuantifierFixedCount) - alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; - loadFromFrameAndJump(alternativeFrameLocation); + loadFromFrameAndJump(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex()); // Link the DataLabelPtr associated with the end of the last // alternative to this point. @@ -2147,7 +2745,7 @@ class YarrGenerator : private DefaultMacroAssembler { } YarrOp& lastOp = m_ops[op.m_previousOp]; - m_checked += lastOp.m_checkAdjust; + m_checkedOffset += lastOp.m_checkAdjust; break; } @@ -2168,9 +2766,9 @@ class YarrGenerator : private DefaultMacroAssembler { // matching start, depending of whether the match is Greedy or NonGreedy. case OpParenthesesSubpatternOnceBegin: { PatternTerm* term = op.m_term; - ASSERT(term->quantityCount == 1); + ASSERT(term->quantityMaxCount == 1); - // We only need to backtrack to thispoint if capturing or greedy. + // We only need to backtrack to this point if capturing or greedy. if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) { m_backtrackingState.link(this); @@ -2182,7 +2780,7 @@ class YarrGenerator : private DefaultMacroAssembler { if (term->quantityType == QuantifierGreedy) { // Clear the flag in the stackframe indicating we ran through the subpattern. unsigned parenthesesFrameLocation = term->frameLocation; - storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); + storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex()); // Jump to after the parentheses, skipping the subpattern. jump(m_ops[op.m_nextOp].m_reentry); // A backtrack from after the parentheses, when skipping the subpattern, @@ -2204,7 +2802,7 @@ class YarrGenerator : private DefaultMacroAssembler { // are currently in a state where we had skipped over the subpattern // (in which case the flag value on the stack will be -1). unsigned parenthesesFrameLocation = term->frameLocation; - Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1)); + Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex()) * sizeof(void*)), TrustedImm32(-1)); if (term->quantityType == QuantifierGreedy) { // For Greedy parentheses, we skip after having already tried going @@ -2248,6 +2846,108 @@ class YarrGenerator : private DefaultMacroAssembler { m_backtrackingState.append(op.m_jumps); break; + // OpParenthesesSubpatternBegin/End + // + // When we are backtracking back out of a capturing subpattern we need + // to clear the start index in the matches output array, to record that + // this subpattern has not been captured. + // + // When backtracking back out of a Greedy quantified subpattern we need + // to catch this, and try running the remainder of the alternative after + // the subpattern again, skipping the parentheses. + // + // Upon backtracking back into a quantified set of parentheses we need to + // check whether we were currently skipping the subpattern. If not, we + // can backtrack into them, if we were we need to either backtrack back + // out of the start of the parentheses, or jump back to the forwards + // matching start, depending of whether the match is Greedy or NonGreedy. + case OpParenthesesSubpatternBegin: { +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + PatternTerm* term = op.m_term; + unsigned parenthesesFrameLocation = term->frameLocation; + + if (term->quantityType != QuantifierFixedCount) { + m_backtrackingState.link(this); + + if (term->quantityType == QuantifierGreedy) { + RegisterID currParenContextReg = regT0; + RegisterID newParenContextReg = regT1; + + loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg); + + restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation); + + freeParenContext(currParenContextReg, newParenContextReg); + storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex()); + const RegisterID countTemporary = regT0; + loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary); + Jump zeroLengthMatch = branchTest32(Zero, countTemporary); + + sub32(TrustedImm32(1), countTemporary); + storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex()); + + jump(m_ops[op.m_nextOp].m_reentry); + + zeroLengthMatch.link(this); + + // Clear the flag in the stackframe indicating we didn't run through the subpattern. + storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()); + + jump(m_ops[op.m_nextOp].m_reentry); + } + + // If Greedy, jump to the end. + if (term->quantityType == QuantifierGreedy) { + // A backtrack from after the parentheses, when skipping the subpattern, + // will jump back to here. + op.m_jumps.link(this); + } + + m_backtrackingState.fallthrough(); + } +#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS + RELEASE_ASSERT_NOT_REACHED(); +#endif + break; + } + case OpParenthesesSubpatternEnd: { +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + PatternTerm* term = op.m_term; + + if (term->quantityType != QuantifierFixedCount) { + m_backtrackingState.link(this); + + // Check whether we should backtrack back into the parentheses, or if we + // are currently in a state where we had skipped over the subpattern + // (in which case the flag value on the stack will be -1). + unsigned parenthesesFrameLocation = term->frameLocation; + Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1)); + + if (term->quantityType == QuantifierGreedy) { + // For Greedy parentheses, we skip after having already tried going + // through the subpattern, so if we get here we're done. + YarrOp& beginOp = m_ops[op.m_previousOp]; + beginOp.m_jumps.append(hadSkipped); + } else { + // For NonGreedy parentheses, we try skipping the subpattern first, + // so if we get here we need to try running through the subpattern + // next. Jump back to the start of the parentheses in the forwards + // matching path. + ASSERT(term->quantityType == QuantifierNonGreedy); + YarrOp& beginOp = m_ops[op.m_previousOp]; + hadSkipped.linkTo(beginOp.m_reentry, this); + } + + m_backtrackingState.fallthrough(); + } + + m_backtrackingState.append(op.m_jumps); +#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS + RELEASE_ASSERT_NOT_REACHED(); +#endif + break; + } + // OpParentheticalAssertionBegin/End case OpParentheticalAssertionBegin: { PatternTerm* term = op.m_term; @@ -2260,7 +2960,7 @@ class YarrGenerator : private DefaultMacroAssembler { m_backtrackingState.link(this); if (op.m_checkAdjust) - add32(Imm32(op.m_checkAdjust), index); + add32(Imm32(op.m_checkAdjust.unsafeGet()), index); // In an inverted assertion failure to match the subpattern // is treated as a successful match - jump to the end of the @@ -2277,7 +2977,7 @@ class YarrGenerator : private DefaultMacroAssembler { // added the failure caused by a successful match to this. m_backtrackingState.append(endOp.m_jumps); - m_checked += op.m_checkAdjust; + m_checkedOffset += op.m_checkAdjust; break; } case OpParentheticalAssertionEnd: { @@ -2289,7 +2989,7 @@ class YarrGenerator : private DefaultMacroAssembler { m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this); YarrOp& lastOp = m_ops[op.m_previousOp]; - m_checked -= lastOp.m_checkAdjust; + m_checkedOffset -= lastOp.m_checkAdjust; break; } @@ -2307,9 +3007,9 @@ class YarrGenerator : private DefaultMacroAssembler { // Emits ops for a subpattern (set of parentheses). These consist // of a set of alternatives wrapped in an outer set of nodes for // the parentheses. - // Supported types of parentheses are 'Once' (quantityCount == 1) - // and 'Terminal' (non-capturing parentheses quantified as greedy - // and infinite). + // Supported types of parentheses are 'Once' (quantityMaxCount == 1), + // 'Terminal' (non-capturing parentheses quantified as greedy + // and infinite), and 0 based greedy quantified parentheses. // Alternatives will use the 'Simple' set of ops if either the // subpattern is terminal (in which case we will never need to // backtrack), or if the subpattern only contains one alternative. @@ -2328,7 +3028,10 @@ class YarrGenerator : private DefaultMacroAssembler { // comes where the subpattern is capturing, in which case we would // need to restore the capture from the first subpattern upon a // failure in the second. - if (term->quantityCount == 1 && !term->parentheses.isCopy) { + if (term->quantityMinCount && term->quantityMinCount != term->quantityMaxCount) { + m_failureReason = JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum; + return; + } if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) { // Select the 'Once' nodes. parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin; parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd; @@ -2344,9 +3047,31 @@ class YarrGenerator : private DefaultMacroAssembler { parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin; parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd; } else { +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + // We only handle generic parenthesis with greedy counts. + if (term->quantityType != QuantifierGreedy) { + // This subpattern is not supported by the JIT. + m_failureReason = JITFailureReason::NonGreedyParenthesizedSubpattern; + return; + } + + m_containsNestedSubpatterns = true; + + // Select the 'Generic' nodes. + parenthesesBeginOpCode = OpParenthesesSubpatternBegin; + parenthesesEndOpCode = OpParenthesesSubpatternEnd; + + // If there is more than one alternative we cannot use the 'simple' nodes. + if (term->parentheses.disjunction->m_alternatives.size() != 1) { + alternativeBeginOpCode = OpNestedAlternativeBegin; + alternativeNextOpCode = OpNestedAlternativeNext; + alternativeEndOpCode = OpNestedAlternativeEnd; + } +#else // This subpattern is not supported by the JIT. - m_shouldFallBack = true; + m_failureReason = JITFailureReason::ParenthesizedSubpattern; return; +#endif } size_t parenBegin = m_ops.size(); @@ -2355,7 +3080,7 @@ class YarrGenerator : private DefaultMacroAssembler { m_ops.append(alternativeBeginOpCode); m_ops.last().m_previousOp = notFound; m_ops.last().m_term = term; - Vector<OwnPtr<PatternAlternative> >& alternatives = term->parentheses.disjunction->m_alternatives; + Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives; for (unsigned i = 0; i < alternatives.size(); ++i) { size_t lastOpIndex = m_ops.size() - 1; @@ -2406,7 +3131,7 @@ class YarrGenerator : private DefaultMacroAssembler { m_ops.append(OpSimpleNestedAlternativeBegin); m_ops.last().m_previousOp = notFound; m_ops.last().m_term = term; - Vector<OwnPtr<PatternAlternative> >& alternatives = term->parentheses.disjunction->m_alternatives; + Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives; for (unsigned i = 0; i < alternatives.size(); ++i) { size_t lastOpIndex = m_ops.size() - 1; @@ -2480,7 +3205,7 @@ class YarrGenerator : private DefaultMacroAssembler { // to return the failing result. void opCompileBody(PatternDisjunction* disjunction) { - Vector<OwnPtr<PatternAlternative> >& alternatives = disjunction->m_alternatives; + Vector<std::unique_ptr<PatternAlternative>>& alternatives = disjunction->m_alternatives; size_t currentAlternativeIndex = 0; // Emit the 'once through' alternatives. @@ -2548,18 +3273,59 @@ class YarrGenerator : private DefaultMacroAssembler { lastOp.m_nextOp = repeatLoop; } + void generateTryReadUnicodeCharacterHelper() + { +#ifdef JIT_UNICODE_EXPRESSIONS + if (m_tryReadUnicodeCharacterCalls.isEmpty()) + return; + + ASSERT(m_decodeSurrogatePairs); + + m_tryReadUnicodeCharacterEntry = label(); + + tryReadUnicodeCharImpl(regT0); + + ret(); +#endif + } + void generateEnter() { #if CPU(X86_64) push(X86Registers::ebp); move(stackPointerRegister, X86Registers::ebp); - push(X86Registers::ebx); + + if (m_pattern.m_saveInitialStartValue) + push(X86Registers::ebx); + +#if OS(WINDOWS) + push(X86Registers::edi); +#endif +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + if (m_containsNestedSubpatterns) { +#if OS(WINDOWS) + push(X86Registers::esi); +#endif + push(X86Registers::r12); + } +#endif + + if (m_decodeSurrogatePairs) { + push(X86Registers::r13); + push(X86Registers::r14); + push(X86Registers::r15); + + move(TrustedImm32(0xd800), leadingSurrogateTag); + move(TrustedImm32(0xdc00), trailingSurrogateTag); + } // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves. zeroExtend32ToPtr(index, index); zeroExtend32ToPtr(length, length); #if OS(WINDOWS) if (compileMode == IncludeSubpatterns) loadPtr(Address(X86Registers::ebp, 6 * sizeof(void*)), output); + // rcx is the pointer to the allocated space for result in x64 Windows. + push(X86Registers::ecx); #endif #elif CPU(X86) push(X86Registers::ebp); @@ -2580,6 +3346,14 @@ class YarrGenerator : private DefaultMacroAssembler { loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output); #endif #elif CPU(ARM64) + if (m_decodeSurrogatePairs) { + pushPair(framePointerRegister, linkRegister); + move(TrustedImm32(0x10000), supplementaryPlanesBase); + move(TrustedImm32(0xfffffc00), surrogateTagMask); + move(TrustedImm32(0xd800), leadingSurrogateTag); + move(TrustedImm32(0xdc00), trailingSurrogateTag); + } + // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves. zeroExtend32ToPtr(index, index); zeroExtend32ToPtr(length, length); @@ -2587,45 +3361,60 @@ class YarrGenerator : private DefaultMacroAssembler { push(ARMRegisters::r4); push(ARMRegisters::r5); push(ARMRegisters::r6); -#if CPU(ARM_TRADITIONAL) - push(ARMRegisters::r8); // scratch register -#endif - if (compileMode == IncludeSubpatterns) - move(ARMRegisters::r3, output); -#elif CPU(SH4) - push(SH4Registers::r11); - push(SH4Registers::r13); + push(ARMRegisters::r8); #elif CPU(MIPS) // Do nothing. #endif + + store8(TrustedImm32(1), &m_vm->isExecutingInRegExpJIT); } void generateReturn() { + store8(TrustedImm32(0), &m_vm->isExecutingInRegExpJIT); + #if CPU(X86_64) #if OS(WINDOWS) // Store the return value in the allocated space pointed by rcx. + pop(X86Registers::ecx); store64(returnRegister, Address(X86Registers::ecx)); store64(returnRegister2, Address(X86Registers::ecx, sizeof(void*))); move(X86Registers::ecx, returnRegister); #endif - pop(X86Registers::ebx); + if (m_decodeSurrogatePairs) { + pop(X86Registers::r15); + pop(X86Registers::r14); + pop(X86Registers::r13); + } + +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + if (m_containsNestedSubpatterns) { + pop(X86Registers::r12); +#if OS(WINDOWS) + pop(X86Registers::esi); +#endif + } +#endif +#if OS(WINDOWS) + pop(X86Registers::edi); +#endif + + if (m_pattern.m_saveInitialStartValue) + pop(X86Registers::ebx); pop(X86Registers::ebp); #elif CPU(X86) pop(X86Registers::esi); pop(X86Registers::edi); pop(X86Registers::ebx); pop(X86Registers::ebp); +#elif CPU(ARM64) + if (m_decodeSurrogatePairs) + popPair(framePointerRegister, linkRegister); #elif CPU(ARM) -#if CPU(ARM_TRADITIONAL) - pop(ARMRegisters::r8); // scratch register -#endif + pop(ARMRegisters::r8); pop(ARMRegisters::r6); pop(ARMRegisters::r5); pop(ARMRegisters::r4); -#elif CPU(SH4) - pop(SH4Registers::r13); - pop(SH4Registers::r11); #elif CPU(MIPS) // Do nothing #endif @@ -2633,25 +3422,57 @@ class YarrGenerator : private DefaultMacroAssembler { } public: - YarrGenerator(YarrPattern& pattern, YarrCharSize charSize) - : m_pattern(pattern) + YarrGenerator(VM* vm, YarrPattern& pattern, YarrCodeBlock& codeBlock, YarrCharSize charSize) + : m_vm(vm) + , m_pattern(pattern) + , m_codeBlock(codeBlock) , m_charSize(charSize) - , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo) - , m_shouldFallBack(false) - , m_checked(0) + , m_decodeSurrogatePairs(m_charSize == Char16 && m_pattern.unicode()) + , m_unicodeIgnoreCase(m_pattern.unicode() && m_pattern.ignoreCase()) + , m_canonicalMode(m_pattern.unicode() ? CanonicalMode::Unicode : CanonicalMode::UCS2) +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + , m_containsNestedSubpatterns(false) + , m_parenContextSizes(compileMode == IncludeSubpatterns ? m_pattern.m_numSubpatterns : 0, m_pattern.m_body->m_callFrameSize) +#endif { } - void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject) + void compile() { + YarrCodeBlock& codeBlock = m_codeBlock; + +#ifndef JIT_UNICODE_EXPRESSIONS + if (m_decodeSurrogatePairs) { + codeBlock.setFallBackWithFailureReason(JITFailureReason::DecodeSurrogatePair); + return; + } +#endif + +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + if (m_containsNestedSubpatterns) + codeBlock.setUsesPaternContextBuffer(); +#endif + + // We need to compile before generating code since we set flags based on compilation that + // are used during generation. + opCompileBody(m_pattern.m_body); + + if (m_failureReason) { + codeBlock.setFallBackWithFailureReason(*m_failureReason); + return; + } + generateEnter(); Jump hasInput = checkInput(); - move(TrustedImmPtr((void*)WTF::notFound), returnRegister); - move(TrustedImm32(0), returnRegister2); - generateReturn(); + generateFailReturn(); hasInput.link(this); +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + if (m_containsNestedSubpatterns) + move(TrustedImm32(matchLimit), remainingMatchCount); +#endif + if (compileMode == IncludeSubpatterns) { for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i) store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int))); @@ -2662,47 +3483,80 @@ public: initCallFrame(); - // Compile the pattern to the internal 'YarrOp' representation. - opCompileBody(m_pattern.m_body); - - // If we encountered anything we can't handle in the JIT code - // (e.g. backreferences) then return early. - if (m_shouldFallBack) { - jitObject.setFallBack(true); - return; +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + if (m_containsNestedSubpatterns) + initParenContextFreeList(); +#endif + + if (m_pattern.m_saveInitialStartValue) { +#ifdef HAVE_INITIAL_START_REG + move(index, initialStart); +#else + storeToFrame(index, m_pattern.m_initialStartValueFrameLocation); +#endif } generate(); backtrack(); - // Link & finalize the code. - LinkBuffer<JSC::DefaultMacroAssembler> linkBuffer(*globalData, this, REGEXP_CODE_ID); + generateTryReadUnicodeCharacterHelper(); + + generateJITFailReturn(); + + JSGlobalData data(m_vm->regExpAllocator); + DefaultLinkBuffer linkBuffer(data, this, REGEXP_CODE_ID, JITCompilationCanFail); + if (linkBuffer.didFailToAllocate()) { + codeBlock.setFallBackWithFailureReason(JITFailureReason::ExecutableMemoryAllocationFailure); + return; + } + + if (!m_tryReadUnicodeCharacterCalls.isEmpty()) { + CodeLocationLabel tryReadUnicodeCharacterHelper = linkBuffer.locationOf(m_tryReadUnicodeCharacterEntry); + + for (auto call : m_tryReadUnicodeCharacterCalls) + linkBuffer.link(call, tryReadUnicodeCharacterHelper); + } + m_backtrackingState.linkDataLabels(linkBuffer); if (compileMode == MatchOnly) { if (m_charSize == Char8) - jitObject.set8BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 8-bit regular expression"))); + codeBlock.set8BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, "YarJIT", "Match-only 8-bit regular expression")); else - jitObject.set16BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 16-bit regular expression"))); + codeBlock.set16BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, "YarJIT", "Match-only 16-bit regular expression")); } else { if (m_charSize == Char8) - jitObject.set8BitCode(FINALIZE_CODE(linkBuffer, ("8-bit regular expression"))); + codeBlock.set8BitCode(FINALIZE_CODE(linkBuffer, "YarJIT", "8-bit regular expression")); else - jitObject.set16BitCode(FINALIZE_CODE(linkBuffer, ("16-bit regular expression"))); + codeBlock.set16BitCode(FINALIZE_CODE(linkBuffer, "YarJIT", "16-bit regular expression")); } - jitObject.setFallBack(m_shouldFallBack); + if (m_failureReason) + codeBlock.setFallBackWithFailureReason(*m_failureReason); } private: + VM* m_vm; + YarrPattern& m_pattern; + YarrCodeBlock& m_codeBlock; YarrCharSize m_charSize; - Scale m_charScale; - // Used to detect regular expression constructs that are not currently // supported in the JIT; fall back to the interpreter when this is detected. - bool m_shouldFallBack; + std::optional<JITFailureReason> m_failureReason; + + bool m_decodeSurrogatePairs; + bool m_unicodeIgnoreCase; + CanonicalMode m_canonicalMode; +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + bool m_containsNestedSubpatterns; + ParenContextSizes m_parenContextSizes; +#endif + JumpList m_abortExecution; + JumpList m_hitMatchLimit; + Vector<Call> m_tryReadUnicodeCharacterCalls; + Label m_tryReadUnicodeCharacterEntry; // The regular expression expressed as a linear sequence of operations. Vector<YarrOp, 128> m_ops; @@ -2717,18 +3571,47 @@ private: // FIXME: This should go away. Rather than tracking this value throughout // code generation, we should gather this information up front & store it // on the YarrOp structure. - int m_checked; + Checked<unsigned> m_checkedOffset; // This class records state whilst generating the backtracking path of code. BacktrackingState m_backtrackingState; }; -void jitCompile(YarrPattern& pattern, YarrCharSize charSize, JSGlobalData* globalData, YarrCodeBlock& jitObject, YarrJITCompileMode mode) +static void dumpCompileFailure(JITFailureReason failure) +{ + switch (failure) { + case JITFailureReason::DecodeSurrogatePair: + dataLog("Can't JIT a pattern decoding surrogate pairs\n"); + break; + case JITFailureReason::BackReference: + dataLog("Can't JIT a pattern containing back references\n"); + break; + case JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum: + dataLog("Can't JIT a pattern containing a variable counted parenthesis with a non-zero minimum\n"); + break; + case JITFailureReason::ParenthesizedSubpattern: + dataLog("Can't JIT a pattern containing parenthesized subpatterns\n"); + break; + case JITFailureReason::NonGreedyParenthesizedSubpattern: + dataLog("Can't JIT a pattern containing non-greedy parenthesized subpatterns\n"); + break; + case JITFailureReason::ExecutableMemoryAllocationFailure: + dataLog("Can't JIT because of failure of allocation of executable memory\n"); + break; + } +} + +void jitCompile(YarrPattern& pattern, YarrCharSize charSize, VM* vm, YarrCodeBlock& codeBlock, YarrJITCompileMode mode) { if (mode == MatchOnly) - YarrGenerator<MatchOnly>(pattern, charSize).compile(globalData, jitObject); + YarrGenerator<MatchOnly>(vm, pattern, codeBlock, charSize).compile(); else - YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(globalData, jitObject); + YarrGenerator<IncludeSubpatterns>(vm, pattern, codeBlock, charSize).compile(); + + if (auto failureReason = codeBlock.failureReason()) { + if (Options::dumpCompiledRegExpPatterns()) + dumpCompileFailure(*failureReason); + } } }} diff --git a/src/3rdparty/masm/yarr/YarrJIT.h b/src/3rdparty/masm/yarr/YarrJIT.h index bb7033fdea..8b6b3a7577 100644 --- a/src/3rdparty/masm/yarr/YarrJIT.h +++ b/src/3rdparty/masm/yarr/YarrJIT.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009 Apple Inc. All rights reserved. + * Copyright (C) 2009-2018 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,12 +23,12 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef YarrJIT_h -#define YarrJIT_h +#pragma once + +#include <Platform.h> #if ENABLE(YARR_JIT) -#include "JSGlobalData.h" #include "MacroAssemblerCodeRef.h" #include "MatchResult.h" #include "Yarr.h" @@ -40,19 +40,39 @@ #define YARR_CALL #endif +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) +constexpr size_t patternContextBufferSize = 8192; // Space caller allocates to save nested parenthesis context +#endif + namespace JSC { -class JSGlobalData; +class VM; class ExecutablePool; namespace Yarr { +enum class JITFailureReason : uint8_t { + DecodeSurrogatePair, + BackReference, + VariableCountedParenthesisWithNonZeroMinimum, + ParenthesizedSubpattern, + NonGreedyParenthesizedSubpattern, + ExecutableMemoryAllocationFailure, +}; + class YarrCodeBlock { -#if CPU(X86_64) +#if CPU(X86_64) || CPU(ARM64) +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + typedef MatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize) YARR_CALL; + typedef MatchResult (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize) YARR_CALL; + typedef MatchResult (*YarrJITCodeMatchOnly8)(const LChar* input, unsigned start, unsigned length, void*, void* freeParenContext, unsigned parenContextSize) YARR_CALL; + typedef MatchResult (*YarrJITCodeMatchOnly16)(const UChar* input, unsigned start, unsigned length, void*, void* freeParenContext, unsigned parenContextSize) YARR_CALL; +#else typedef MatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output) YARR_CALL; typedef MatchResult (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL; typedef MatchResult (*YarrJITCodeMatchOnly8)(const LChar* input, unsigned start, unsigned length) YARR_CALL; typedef MatchResult (*YarrJITCodeMatchOnly16)(const UChar* input, unsigned start, unsigned length) YARR_CALL; +#endif #else typedef EncodedMatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output) YARR_CALL; typedef EncodedMatchResult (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL; @@ -61,17 +81,10 @@ class YarrCodeBlock { #endif public: - YarrCodeBlock() - : m_needFallBack(false) - { - } - - ~YarrCodeBlock() - { - } + YarrCodeBlock() = default; - void setFallBack(bool fallback) { m_needFallBack = fallback; } - bool isFallBack() { return m_needFallBack; } + void setFallBackWithFailureReason(JITFailureReason failureReason) { m_failureReason = failureReason; } + std::optional<JITFailureReason> failureReason() { return m_failureReason; } bool has8BitCode() { return m_ref8.size(); } bool has16BitCode() { return m_ref16.size(); } @@ -83,6 +96,34 @@ public: void set8BitCodeMatchOnly(MacroAssemblerCodeRef matchOnly) { m_matchOnly8 = matchOnly; } void set16BitCodeMatchOnly(MacroAssemblerCodeRef matchOnly) { m_matchOnly16 = matchOnly; } +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + bool usesPatternContextBuffer() { return m_usesPatternContextBuffer; } + void setUsesPaternContextBuffer() { m_usesPatternContextBuffer = true; } + + MatchResult execute(const LChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize) + { + ASSERT(has8BitCode()); + return MatchResult(reinterpret_cast<YarrJITCode8>(m_ref8.code().executableAddress())(input, start, length, output, freeParenContext, parenContextSize)); + } + + MatchResult execute(const UChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize) + { + ASSERT(has16BitCode()); + return MatchResult(reinterpret_cast<YarrJITCode16>(m_ref16.code().executableAddress())(input, start, length, output, freeParenContext, parenContextSize)); + } + + MatchResult execute(const LChar* input, unsigned start, unsigned length, void* freeParenContext, unsigned parenContextSize) + { + ASSERT(has8BitCodeMatchOnly()); + return MatchResult(reinterpret_cast<YarrJITCodeMatchOnly8>(m_matchOnly8.code().executableAddress())(input, start, length, 0, freeParenContext, parenContextSize)); + } + + MatchResult execute(const UChar* input, unsigned start, unsigned length, void* freeParenContext, unsigned parenContextSize) + { + ASSERT(has16BitCodeMatchOnly()); + return MatchResult(reinterpret_cast<YarrJITCodeMatchOnly16>(m_matchOnly16.code().executableAddress())(input, start, length, 0, freeParenContext, parenContextSize)); + } +#else MatchResult execute(const LChar* input, unsigned start, unsigned length, int* output) { ASSERT(has8BitCode()); @@ -106,18 +147,54 @@ public: ASSERT(has16BitCodeMatchOnly()); return MatchResult(reinterpret_cast<YarrJITCodeMatchOnly16>(m_matchOnly16.code().executableAddress())(input, start, length)); } +#endif #if ENABLE(REGEXP_TRACING) - void *getAddr() { return m_ref.code().executableAddress(); } + void *get8BitMatchOnlyAddr() + { + if (!has8BitCodeMatchOnly()) + return 0; + + return m_matchOnly8.code().executableAddress(); + } + + void *get16BitMatchOnlyAddr() + { + if (!has16BitCodeMatchOnly()) + return 0; + + return m_matchOnly16.code().executableAddress(); + } + + void *get8BitMatchAddr() + { + if (!has8BitCode()) + return 0; + + return m_ref8.code().executableAddress(); + } + + void *get16BitMatchAddr() + { + if (!has16BitCode()) + return 0; + + return m_ref16.code().executableAddress(); + } #endif + size_t size() const + { + return m_ref8.size() + m_ref16.size() + m_matchOnly8.size() + m_matchOnly16.size(); + } + void clear() { m_ref8 = MacroAssemblerCodeRef(); m_ref16 = MacroAssemblerCodeRef(); m_matchOnly8 = MacroAssemblerCodeRef(); m_matchOnly16 = MacroAssemblerCodeRef(); - m_needFallBack = false; + m_failureReason = std::nullopt; } private: @@ -125,17 +202,18 @@ private: MacroAssemblerCodeRef m_ref16; MacroAssemblerCodeRef m_matchOnly8; MacroAssemblerCodeRef m_matchOnly16; - bool m_needFallBack; +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + bool m_usesPatternContextBuffer; +#endif + std::optional<JITFailureReason> m_failureReason; }; enum YarrJITCompileMode { MatchOnly, IncludeSubpatterns }; -void jitCompile(YarrPattern&, YarrCharSize, JSGlobalData*, YarrCodeBlock& jitObject, YarrJITCompileMode = IncludeSubpatterns); +void jitCompile(YarrPattern&, YarrCharSize, VM*, YarrCodeBlock& jitObject, YarrJITCompileMode = IncludeSubpatterns); } } // namespace JSC::Yarr #endif - -#endif // YarrJIT_h diff --git a/src/3rdparty/masm/yarr/YarrParser.h b/src/3rdparty/masm/yarr/YarrParser.h index 13ffd3a1d6..3e5311f1fb 100644 --- a/src/3rdparty/masm/yarr/YarrParser.h +++ b/src/3rdparty/masm/yarr/YarrParser.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009 Apple Inc. All rights reserved. + * Copyright (C) 2009, 2014-2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,46 +23,25 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef YarrParser_h -#define YarrParser_h +#pragma once #include "Yarr.h" +#include "YarrPattern.h" +#include "YarrUnicodeProperties.h" #include <wtf/ASCIICType.h> +#include <wtf/HashSet.h> +#include <wtf/Optional.h> +#include <wtf/text/StringBuilder.h> #include <wtf/text/WTFString.h> -#include <wtf/unicode/Unicode.h> namespace JSC { namespace Yarr { -#define REGEXP_ERROR_PREFIX "Invalid regular expression: " - -enum BuiltInCharacterClassID { - DigitClassID, - SpaceClassID, - WordClassID, - NewlineClassID, -}; - // The Parser class should not be used directly - only via the Yarr::parse() method. template<class Delegate, typename CharType> class Parser { private: template<class FriendDelegate> - friend const char* parse(FriendDelegate&, const String& pattern, unsigned backReferenceLimit); - - enum ErrorCode { - NoError, - PatternTooLarge, - QuantifierOutOfOrder, - QuantifierWithoutAtom, - QuantifierTooLarge, - MissingParentheses, - ParenthesesUnmatched, - ParenthesesTypeInvalid, - CharacterClassUnmatched, - CharacterClassOutOfOrder, - EscapeUnterminated, - NumberOfErrorCodes - }; + friend ErrorCode parse(FriendDelegate&, const String& pattern, bool isUnicode, unsigned backReferenceLimit); /* * CharacterClassParserDelegate: @@ -77,7 +56,7 @@ private: public: CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err) : m_delegate(delegate) - , m_err(err) + , m_errorCode(err) , m_state(Empty) , m_character(0) { @@ -102,7 +81,7 @@ private: * mode we will allow a hypen to be treated as indicating a range (i.e. /[a-z]/ * is different to /[a\-z]/). */ - void atomPatternCharacter(UChar ch, bool hyphenIsRange = false) + void atomPatternCharacter(UChar32 ch, bool hyphenIsRange = false) { switch (m_state) { case AfterCharacterClass: @@ -118,7 +97,8 @@ private: m_state = AfterCharacterClassHyphen; return; } - Q_FALLTHROUGH(); // cached character, so treat this as Empty. + // Otherwise just fall through - cached character so treat this as Empty. + FALLTHROUGH; case Empty: m_character = ch; @@ -136,7 +116,7 @@ private: case CachedCharacterHyphen: if (ch < m_character) { - m_err = CharacterClassOutOfOrder; + m_errorCode = ErrorCode::CharacterClassOutOfOrder; return; } m_delegate.atomCharacterClassRange(m_character, ch); @@ -168,8 +148,7 @@ private: case CachedCharacter: // Flush the currently cached character, then fall through. m_delegate.atomCharacterClassAtom(m_character); - Q_FALLTHROUGH(); - + FALLTHROUGH; case Empty: case AfterCharacterClass: m_state = AfterCharacterClass; @@ -187,7 +166,7 @@ private: case CachedCharacterHyphen: m_delegate.atomCharacterClassAtom(m_character); m_delegate.atomCharacterClassAtom('-'); - // fall through + FALLTHROUGH; case AfterCharacterClassHyphen: m_delegate.atomCharacterClassBuiltIn(classID, invert); m_state = Empty; @@ -215,10 +194,11 @@ private: // invoked with inCharacterClass set. NO_RETURN_DUE_TO_ASSERT void assertionWordBoundary(bool) { RELEASE_ASSERT_NOT_REACHED(); } NO_RETURN_DUE_TO_ASSERT void atomBackReference(unsigned) { RELEASE_ASSERT_NOT_REACHED(); } + NO_RETURN_DUE_TO_ASSERT void atomNamedBackReference(String) { RELEASE_ASSERT_NOT_REACHED(); } private: Delegate& m_delegate; - ErrorCode& m_err; + ErrorCode& m_errorCode; enum CharacterClassConstructionState { Empty, CachedCharacter, @@ -226,20 +206,31 @@ private: AfterCharacterClass, AfterCharacterClassHyphen, } m_state; - UChar m_character; + UChar32 m_character; }; - Parser(Delegate& delegate, const String& pattern, unsigned backReferenceLimit) + Parser(Delegate& delegate, const String& pattern, bool isUnicode, unsigned backReferenceLimit) : m_delegate(delegate) , m_backReferenceLimit(backReferenceLimit) - , m_err(NoError) - , m_data(pattern.getCharacters<CharType>()) + , m_data(pattern.characters<CharType>()) , m_size(pattern.length()) - , m_index(0) - , m_parenthesesNestingDepth(0) + , m_isUnicode(isUnicode) { } + // The handling of IdentityEscapes is different depending on the unicode flag. + // For Unicode patterns, IdentityEscapes only include SyntaxCharacters or '/'. + // For non-unicode patterns, most any character can be escaped. + bool isIdentityEscapeAnError(int ch) + { + if (m_isUnicode && !strchr("^$\\.*+?()[]{}|/", ch)) { + m_errorCode = ErrorCode::InvalidIdentityEscape; + return true; + } + + return false; + } + /* * parseEscape(): * @@ -263,12 +254,12 @@ private: template<bool inCharacterClass, class EscapeDelegate> bool parseEscape(EscapeDelegate& delegate) { - ASSERT(!m_err); + ASSERT(!hasError(m_errorCode)); ASSERT(peek() == '\\'); consume(); if (atEndOfPattern()) { - m_err = EscapeUnterminated; + m_errorCode = ErrorCode::EscapeUnterminated; return false; } @@ -276,18 +267,24 @@ private: // Assertions case 'b': consume(); - if (inCharacterClass) + if (inCharacterClass) { + if (isIdentityEscapeAnError('b')) + break; + delegate.atomPatternCharacter('\b'); - else { + } else { delegate.assertionWordBoundary(false); return false; } break; case 'B': consume(); - if (inCharacterClass) + if (inCharacterClass) { + if (isIdentityEscapeAnError('B')) + break; + delegate.atomPatternCharacter('B'); - else { + } else { delegate.assertionWordBoundary(true); return false; } @@ -296,27 +293,27 @@ private: // CharacterClassEscape case 'd': consume(); - delegate.atomBuiltInCharacterClass(DigitClassID, false); + delegate.atomBuiltInCharacterClass(BuiltInCharacterClassID::DigitClassID, false); break; case 's': consume(); - delegate.atomBuiltInCharacterClass(SpaceClassID, false); + delegate.atomBuiltInCharacterClass(BuiltInCharacterClassID::SpaceClassID, false); break; case 'w': consume(); - delegate.atomBuiltInCharacterClass(WordClassID, false); + delegate.atomBuiltInCharacterClass(BuiltInCharacterClassID::WordClassID, false); break; case 'D': consume(); - delegate.atomBuiltInCharacterClass(DigitClassID, true); + delegate.atomBuiltInCharacterClass(BuiltInCharacterClassID::DigitClassID, true); break; case 'S': consume(); - delegate.atomBuiltInCharacterClass(SpaceClassID, true); + delegate.atomBuiltInCharacterClass(BuiltInCharacterClassID::SpaceClassID, true); break; case 'W': consume(); - delegate.atomBuiltInCharacterClass(WordClassID, true); + delegate.atomBuiltInCharacterClass(BuiltInCharacterClassID::WordClassID, true); break; // DecimalEscape @@ -341,15 +338,22 @@ private: } restoreState(state); + + if (m_isUnicode) { + m_errorCode = ErrorCode::InvalidBackreference; + return false; + } } - - // Not a backreference, and not octal. + + // Not a backreference, and not octal. Just a number. if (peek() >= '8') { - delegate.atomPatternCharacter('\\'); + delegate.atomPatternCharacter(consume()); break; } + + // Fall-through to handle this as an octal escape. + FALLTHROUGH; } - Q_FALLTHROUGH(); // Handle this as an octal escape. // Octal escape case '0': @@ -400,32 +404,161 @@ private: case 'x': { consume(); int x = tryConsumeHex(2); - if (x == -1) + if (x == -1) { + if (isIdentityEscapeAnError('x')) + break; + delegate.atomPatternCharacter('x'); - else + } else delegate.atomPatternCharacter(x); break; } + // Named backreference + case 'k': { + consume(); + ParseState state = saveState(); + if (!atEndOfPattern() && !inCharacterClass) { + if (consume() == '<') { + auto groupName = tryConsumeGroupName(); + if (groupName && m_captureGroupNames.contains(groupName.value())) { + delegate.atomNamedBackReference(groupName.value()); + break; + } + if (m_isUnicode) { + m_errorCode = ErrorCode::InvalidBackreference; + break; + } + } + } + restoreState(state); + delegate.atomPatternCharacter('k'); + break; + } + + // Unicode property escapes + case 'p': + case 'P': { + int escapeChar = consume(); + + if (!m_isUnicode) { + if (isIdentityEscapeAnError(escapeChar)) + break; + delegate.atomPatternCharacter(escapeChar); + break; + } + + if (!atEndOfPattern() && peek() == '{') { + consume(); + auto optClassID = tryConsumeUnicodePropertyExpression(); + if (!optClassID) { + // tryConsumeUnicodePropertyExpression() will set m_errorCode for a malformed property expression + break; + } + delegate.atomBuiltInCharacterClass(optClassID.value(), escapeChar == 'P'); + } else + m_errorCode = ErrorCode::InvalidUnicodePropertyExpression; + break; + } + // UnicodeEscape case 'u': { consume(); + if (atEndOfPattern()) { + if (isIdentityEscapeAnError('u')) + break; + + delegate.atomPatternCharacter('u'); + break; + } + + if (m_isUnicode && peek() == '{') { + consume(); + UChar32 codePoint = 0; + do { + if (atEndOfPattern() || !isASCIIHexDigit(peek())) { + m_errorCode = ErrorCode::InvalidUnicodeEscape; + break; + } + + codePoint = (codePoint << 4) | toASCIIHexValue(consume()); + + if (codePoint > UCHAR_MAX_VALUE) + m_errorCode = ErrorCode::InvalidUnicodeEscape; + } while (!atEndOfPattern() && peek() != '}'); + if (!atEndOfPattern() && peek() == '}') + consume(); + else if (!hasError(m_errorCode)) + m_errorCode = ErrorCode::InvalidUnicodeEscape; + if (hasError(m_errorCode)) + return false; + + delegate.atomPatternCharacter(codePoint); + break; + } int u = tryConsumeHex(4); - if (u == -1) + if (u == -1) { + if (isIdentityEscapeAnError('u')) + break; + delegate.atomPatternCharacter('u'); - else + } else { + // If we have the first of a surrogate pair, look for the second. + if (U16_IS_LEAD(u) && m_isUnicode && (patternRemaining() >= 6) && peek() == '\\') { + ParseState state = saveState(); + consume(); + + if (tryConsume('u')) { + int surrogate2 = tryConsumeHex(4); + if (U16_IS_TRAIL(surrogate2)) { + u = U16_GET_SUPPLEMENTARY(u, surrogate2); + delegate.atomPatternCharacter(u); + break; + } + } + + restoreState(state); + } delegate.atomPatternCharacter(u); + } break; } // IdentityEscape default: + int ch = peek(); + + if (ch == '-' && m_isUnicode && inCharacterClass) { + // \- is allowed for ClassEscape with unicode flag. + delegate.atomPatternCharacter(consume()); + break; + } + + if (isIdentityEscapeAnError(ch)) + break; + delegate.atomPatternCharacter(consume()); } return true; } + UChar32 consumePossibleSurrogatePair() + { + UChar32 ch = consume(); + if (U16_IS_LEAD(ch) && m_isUnicode && (patternRemaining() > 0)) { + ParseState state = saveState(); + + UChar32 surrogate2 = consume(); + if (U16_IS_TRAIL(surrogate2)) + ch = U16_GET_SUPPLEMENTARY(ch, surrogate2); + else + restoreState(state); + } + + return ch; + } + /* * parseAtomEscape(), parseCharacterClassEscape(): * @@ -449,11 +582,11 @@ private: */ void parseCharacterClass() { - ASSERT(!m_err); + ASSERT(!hasError(m_errorCode)); ASSERT(peek() == '['); consume(); - CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err); + CharacterClassParserDelegate characterClassConstructor(m_delegate, m_errorCode); characterClassConstructor.begin(tryConsume('^')); @@ -469,14 +602,14 @@ private: break; default: - characterClassConstructor.atomPatternCharacter(consume(), true); + characterClassConstructor.atomPatternCharacter(consumePossibleSurrogatePair(), true); } - if (m_err) + if (hasError(m_errorCode)) return; } - m_err = CharacterClassUnmatched; + m_errorCode = ErrorCode::CharacterClassUnmatched; } /* @@ -486,13 +619,13 @@ private: */ void parseParenthesesBegin() { - ASSERT(!m_err); + ASSERT(!hasError(m_errorCode)); ASSERT(peek() == '('); consume(); if (tryConsume('?')) { if (atEndOfPattern()) { - m_err = ParenthesesTypeInvalid; + m_errorCode = ErrorCode::ParenthesesTypeInvalid; return; } @@ -508,9 +641,23 @@ private: case '!': m_delegate.atomParentheticalAssertionBegin(true); break; - + + case '<': { + auto groupName = tryConsumeGroupName(); + if (groupName) { + auto setAddResult = m_captureGroupNames.add(groupName.value()); + if (setAddResult.isNewEntry) + m_delegate.atomParenthesesSubpatternBegin(true, groupName); + else + m_errorCode = ErrorCode::DuplicateGroupName; + } else + m_errorCode = ErrorCode::InvalidGroupName; + + break; + } + default: - m_err = ParenthesesTypeInvalid; + m_errorCode = ErrorCode::ParenthesesTypeInvalid; } } else m_delegate.atomParenthesesSubpatternBegin(); @@ -525,14 +672,14 @@ private: */ void parseParenthesesEnd() { - ASSERT(!m_err); + ASSERT(!hasError(m_errorCode)); ASSERT(peek() == ')'); consume(); if (m_parenthesesNestingDepth > 0) m_delegate.atomParenthesesEnd(); else - m_err = ParenthesesUnmatched; + m_errorCode = ErrorCode::ParenthesesUnmatched; --m_parenthesesNestingDepth; } @@ -544,18 +691,18 @@ private: */ void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max) { - ASSERT(!m_err); + ASSERT(!hasError(m_errorCode)); ASSERT(min <= max); if (min == UINT_MAX) { - m_err = QuantifierTooLarge; + m_errorCode = ErrorCode::QuantifierTooLarge; return; } if (lastTokenWasAnAtom) m_delegate.quantifyAtom(min, max, !tryConsume('?')); else - m_err = QuantifierWithoutAtom; + m_errorCode = ErrorCode::QuantifierWithoutAtom; } /* @@ -603,7 +750,7 @@ private: case '.': consume(); - m_delegate.atomBuiltInCharacterClass(NewlineClassID, true); + m_delegate.atomBuiltInCharacterClass(BuiltInCharacterClassID::DotClassID, false); lastTokenWasAnAtom = true; break; @@ -649,7 +796,7 @@ private: if (min <= max) parseQuantifier(lastTokenWasAnAtom, min, max); else - m_err = QuantifierOutOfOrder; + m_errorCode = ErrorCode::QuantifierOutOfOrder; lastTokenWasAnAtom = false; break; } @@ -657,51 +804,36 @@ private: restoreState(state); } - Q_FALLTHROUGH(); // if we did not find a complete quantifer, fall through to the default case. + // if we did not find a complete quantifer, fall through to the default case. + FALLTHROUGH; default: - m_delegate.atomPatternCharacter(consume()); + m_delegate.atomPatternCharacter(consumePossibleSurrogatePair()); lastTokenWasAnAtom = true; } - if (m_err) + if (hasError(m_errorCode)) return; } if (m_parenthesesNestingDepth > 0) - m_err = MissingParentheses; + m_errorCode = ErrorCode::MissingParentheses; } /* * parse(): * - * This method calls parseTokens() to parse over the input and converts any - * error code to a const char* for a result. + * This method calls parseTokens() to parse over the input and returns error code for a result. */ - const char* parse() + ErrorCode parse() { if (m_size > MAX_PATTERN_SIZE) - m_err = PatternTooLarge; + m_errorCode = ErrorCode::PatternTooLarge; else parseTokens(); - ASSERT(atEndOfPattern() || m_err); - - // The order of this array must match the ErrorCode enum. - static const char* errorMessages[NumberOfErrorCodes] = { - 0, // NoError - REGEXP_ERROR_PREFIX "regular expression too large", - REGEXP_ERROR_PREFIX "numbers out of order in {} quantifier", - REGEXP_ERROR_PREFIX "nothing to repeat", - REGEXP_ERROR_PREFIX "number too large in {} quantifier", - REGEXP_ERROR_PREFIX "missing )", - REGEXP_ERROR_PREFIX "unmatched parentheses", - REGEXP_ERROR_PREFIX "unrecognized character after (?", - REGEXP_ERROR_PREFIX "missing terminating ] for character class", - REGEXP_ERROR_PREFIX "range out of order in character class", - REGEXP_ERROR_PREFIX "\\ at end of pattern" - }; - - return errorMessages[m_err]; + ASSERT(atEndOfPattern() || hasError(m_errorCode)); + + return m_errorCode; } // Misc helper functions: @@ -724,6 +856,12 @@ private: return m_index == m_size; } + unsigned patternRemaining() + { + ASSERT(m_index <= m_size); + return m_size - m_index; + } + int peek() { ASSERT(m_index < m_size); @@ -741,6 +879,87 @@ private: return peek() - '0'; } + int tryConsumeUnicodeEscape() + { + if (!tryConsume('u')) + return -1; + + if (m_isUnicode && tryConsume('{')) { + int codePoint = 0; + do { + if (atEndOfPattern() || !isASCIIHexDigit(peek())) { + m_errorCode = ErrorCode::InvalidUnicodeEscape; + return -1; + } + + codePoint = (codePoint << 4) | toASCIIHexValue(consume()); + + if (codePoint > UCHAR_MAX_VALUE) { + m_errorCode = ErrorCode::InvalidUnicodeEscape; + return -1; + } + } while (!atEndOfPattern() && peek() != '}'); + if (!atEndOfPattern() && peek() == '}') + consume(); + else if (!hasError(m_errorCode)) + m_errorCode = ErrorCode::InvalidUnicodeEscape; + if (hasError(m_errorCode)) + return -1; + + return codePoint; + } + + int u = tryConsumeHex(4); + if (u == -1) + return -1; + + // If we have the first of a surrogate pair, look for the second. + if (U16_IS_LEAD(u) && m_isUnicode && (patternRemaining() >= 6) && peek() == '\\') { + ParseState state = saveState(); + consume(); + + if (tryConsume('u')) { + int surrogate2 = tryConsumeHex(4); + if (U16_IS_TRAIL(surrogate2)) { + u = U16_GET_SUPPLEMENTARY(u, surrogate2); + return u; + } + } + + restoreState(state); + } + + return u; + } + + int tryConsumeIdentifierCharacter() + { + int ch = peek(); + + if (ch == '\\') { + consume(); + ch = tryConsumeUnicodeEscape(); + } else + consume(); + + return ch; + } + + bool isIdentifierStart(int ch) + { + return (WTF::isASCII(ch) && (WTF::isASCIIAlpha(ch) || ch == '_' || ch == '$')) || (U_GET_GC_MASK(ch) & U_GC_L_MASK); + } + + bool isIdentifierPart(int ch) + { + return (WTF::isASCII(ch) && (WTF::isASCIIAlpha(ch) || ch == '_' || ch == '$')) || (U_GET_GC_MASK(ch) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || ch == 0x200C || ch == 0x200D; + } + + bool isUnicodePropertyValueExpressionChar(int ch) + { + return WTF::isASCIIAlphanumeric(ch) || ch == '_' || ch == '='; + } + int consume() { ASSERT(m_index < m_size); @@ -755,13 +974,10 @@ private: unsigned consumeNumber() { - unsigned n = consumeDigit(); - // check for overflow. - for (unsigned newValue; peekIsDigit() && ((newValue = n * 10 + peekDigit()) >= n); ) { - n = newValue; - consume(); - } - return n; + Checked<unsigned, RecordOverflow> n = consumeDigit(); + while (peekIsDigit()) + n = n * 10 + consumeDigit(); + return n.hasOverflowed() ? quantifyInfinite : n.unsafeGet(); } unsigned consumeOctal() @@ -797,13 +1013,99 @@ private: return n; } + std::optional<String> tryConsumeGroupName() + { + if (atEndOfPattern()) + return std::nullopt; + + ParseState state = saveState(); + + int ch = tryConsumeIdentifierCharacter(); + + if (isIdentifierStart(ch)) { + StringBuilder identifierBuilder; + identifierBuilder.append(ch); + + while (!atEndOfPattern()) { + ch = tryConsumeIdentifierCharacter(); + if (ch == '>') + return std::optional<String>(identifierBuilder.toString()); + + if (!isIdentifierPart(ch)) + break; + + identifierBuilder.append(ch); + } + } + + restoreState(state); + + return std::nullopt; + } + + std::optional<BuiltInCharacterClassID> tryConsumeUnicodePropertyExpression() + { + if (atEndOfPattern() || !isUnicodePropertyValueExpressionChar(peek())) { + m_errorCode = ErrorCode::InvalidUnicodePropertyExpression; + return std::nullopt; + } + + StringBuilder expressionBuilder; + String unicodePropertyName; + bool foundEquals = false; + unsigned errors = 0; + + expressionBuilder.append(consume()); + + while (!atEndOfPattern()) { + int ch = peek(); + if (ch == '}') { + consume(); + if (errors) { + m_errorCode = ErrorCode::InvalidUnicodePropertyExpression; + return std::nullopt; + } + + if (foundEquals) { + auto result = unicodeMatchPropertyValue(unicodePropertyName, expressionBuilder.toString()); + if (!result) + m_errorCode = ErrorCode::InvalidUnicodePropertyExpression; + return result; + } + + auto result = unicodeMatchProperty(expressionBuilder.toString()); + if (!result) + m_errorCode = ErrorCode::InvalidUnicodePropertyExpression; + return result; + } + + consume(); + if (ch == '=') { + if (!foundEquals) { + foundEquals = true; + unicodePropertyName = expressionBuilder.toString(); + expressionBuilder.clear(); + } else + errors++; + } else if (!isUnicodePropertyValueExpressionChar(ch)) + errors++; + else + expressionBuilder.append(ch); + } + + m_errorCode = ErrorCode::InvalidUnicodePropertyExpression; + return std::nullopt; + } + Delegate& m_delegate; unsigned m_backReferenceLimit; - ErrorCode m_err; + ErrorCode m_errorCode { ErrorCode::NoError }; const CharType* m_data; unsigned m_size; - unsigned m_index; - unsigned m_parenthesesNestingDepth; + unsigned m_index { 0 }; + bool m_isUnicode; + unsigned m_parenthesesNestingDepth { 0 }; + HashSet<String> m_captureGroupNames; // Derived by empirical testing of compile time in PCRE and WREC. static const unsigned MAX_PATTERN_SIZE = 1024 * 1024; @@ -823,17 +1125,18 @@ private: * void assertionEOL(); * void assertionWordBoundary(bool invert); * - * void atomPatternCharacter(UChar ch); + * void atomPatternCharacter(UChar32 ch); * void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert); * void atomCharacterClassBegin(bool invert) - * void atomCharacterClassAtom(UChar ch) - * void atomCharacterClassRange(UChar begin, UChar end) + * void atomCharacterClassAtom(UChar32 ch) + * void atomCharacterClassRange(UChar32 begin, UChar32 end) * void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert) * void atomCharacterClassEnd() - * void atomParenthesesSubpatternBegin(bool capture = true); + * void atomParenthesesSubpatternBegin(bool capture = true, std::optional<String> groupName); * void atomParentheticalAssertionBegin(bool invert = false); * void atomParenthesesEnd(); * void atomBackReference(unsigned subpatternId); + * void atomNamedBackReference(String subpatternName); * * void quantifyAtom(unsigned min, unsigned max, bool greedy); * @@ -869,13 +1172,11 @@ private: */ template<class Delegate> -const char* parse(Delegate& delegate, const String& pattern, unsigned backReferenceLimit = quantifyInfinite) +ErrorCode parse(Delegate& delegate, const String& pattern, bool isUnicode, unsigned backReferenceLimit = quantifyInfinite) { if (pattern.is8Bit()) - return Parser<Delegate, LChar>(delegate, pattern, backReferenceLimit).parse(); - return Parser<Delegate, UChar>(delegate, pattern, backReferenceLimit).parse(); + return Parser<Delegate, LChar>(delegate, pattern, isUnicode, backReferenceLimit).parse(); + return Parser<Delegate, UChar>(delegate, pattern, isUnicode, backReferenceLimit).parse(); } } } // namespace JSC::Yarr - -#endif // YarrParser_h diff --git a/src/3rdparty/masm/yarr/YarrPattern.cpp b/src/3rdparty/masm/yarr/YarrPattern.cpp index c7e5b6b09b..ac66ea1b9a 100644 --- a/src/3rdparty/masm/yarr/YarrPattern.cpp +++ b/src/3rdparty/masm/yarr/YarrPattern.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009, 2013 Apple Inc. All rights reserved. + * Copyright (C) 2009, 2013-2016 Apple Inc. All rights reserved. * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged * * Redistribution and use in source and binary forms, with or without @@ -27,10 +27,15 @@ #include "config.h" #include "YarrPattern.h" +#include "Options.h" #include "Yarr.h" -#include "YarrCanonicalizeUCS2.h" +#include "YarrCanonicalize.h" #include "YarrParser.h" +#include <wtf/DataLog.h> +#include <wtf/Optional.h> +//#include <wtf/Threading.h> #include <wtf/Vector.h> +#include <wtf/text/WTFString.h> using namespace WTF; @@ -40,8 +45,11 @@ namespace JSC { namespace Yarr { class CharacterClassConstructor { public: - CharacterClassConstructor(bool isCaseInsensitive = false) + CharacterClassConstructor(bool isCaseInsensitive, CanonicalMode canonicalMode) : m_isCaseInsensitive(isCaseInsensitive) + , m_hasNonBMPCharacters(false) + , m_anyCharacter(false) + , m_canonicalMode(canonicalMode) { } @@ -51,6 +59,8 @@ public: m_ranges.clear(); m_matchesUnicode.clear(); m_rangesUnicode.clear(); + m_hasNonBMPCharacters = false; + m_anyCharacter = false; } void append(const CharacterClass* other) @@ -65,11 +75,71 @@ public: addSortedRange(m_rangesUnicode, other->m_rangesUnicode[i].begin, other->m_rangesUnicode[i].end); } - void putChar(UChar ch) + void appendInverted(const CharacterClass* other) { - // Handle ascii cases. - if (ch <= 0x7f) { - if (m_isCaseInsensitive && isASCIIAlpha(ch)) { + auto addSortedInverted = [&](UChar32 min, UChar32 max, + const Vector<UChar32>& srcMatches, const Vector<CharacterRange>& srcRanges, + Vector<UChar32>& destMatches, Vector<CharacterRange>& destRanges) { + + auto addSortedMatchOrRange = [&](UChar32 lo, UChar32 hiPlusOne) { + if (lo < hiPlusOne) { + if (lo + 1 == hiPlusOne) + addSorted(destMatches, lo); + else + addSortedRange(destRanges, lo, hiPlusOne - 1); + } + }; + + UChar32 lo = min; + size_t matchesIndex = 0; + size_t rangesIndex = 0; + bool matchesRemaining = matchesIndex < srcMatches.size(); + bool rangesRemaining = rangesIndex < srcRanges.size(); + + if (!matchesRemaining && !rangesRemaining) { + addSortedMatchOrRange(min, max + 1); + return; + } + + while (matchesRemaining || rangesRemaining) { + UChar32 hiPlusOne; + UChar32 nextLo; + + if (matchesRemaining + && (!rangesRemaining || srcMatches[matchesIndex] < srcRanges[rangesIndex].begin)) { + hiPlusOne = srcMatches[matchesIndex]; + nextLo = hiPlusOne + 1; + ++matchesIndex; + matchesRemaining = matchesIndex < srcMatches.size(); + } else { + hiPlusOne = srcRanges[rangesIndex].begin; + nextLo = srcRanges[rangesIndex].end + 1; + ++rangesIndex; + rangesRemaining = rangesIndex < srcRanges.size(); + } + + addSortedMatchOrRange(lo, hiPlusOne); + + lo = nextLo; + } + + addSortedMatchOrRange(lo, max + 1); + }; + + addSortedInverted(0, 0x7f, other->m_matches, other->m_ranges, m_matches, m_ranges); + addSortedInverted(0x80, 0x10ffff, other->m_matchesUnicode, other->m_rangesUnicode, m_matchesUnicode, m_rangesUnicode); + } + + void putChar(UChar32 ch) + { + if (!m_isCaseInsensitive) { + addSorted(ch); + return; + } + + if (m_canonicalMode == CanonicalMode::UCS2 && isASCII(ch)) { + // Handle ASCII cases. + if (isASCIIAlpha(ch)) { addSorted(m_matches, toASCIIUpper(ch)); addSorted(m_matches, toASCIILower(ch)); } else @@ -77,40 +147,33 @@ public: return; } - // Simple case, not a case-insensitive match. - if (!m_isCaseInsensitive) { - addSorted(m_matchesUnicode, ch); - return; - } - // Add multiple matches, if necessary. - UCS2CanonicalizationRange* info = rangeInfoFor(ch); + const CanonicalizationRange* info = canonicalRangeInfoFor(ch, m_canonicalMode); if (info->type == CanonicalizeUnique) - addSorted(m_matchesUnicode, ch); + addSorted(ch); else putUnicodeIgnoreCase(ch, info); } - void putUnicodeIgnoreCase(UChar ch, UCS2CanonicalizationRange* info) + void putUnicodeIgnoreCase(UChar32 ch, const CanonicalizationRange* info) { ASSERT(m_isCaseInsensitive); - ASSERT(ch > 0x7f); ASSERT(ch >= info->begin && ch <= info->end); ASSERT(info->type != CanonicalizeUnique); if (info->type == CanonicalizeSet) { - for (uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set) - addSorted(m_matchesUnicode, ch); + for (const UChar32* set = canonicalCharacterSetInfo(info->value, m_canonicalMode); (ch = *set); ++set) + addSorted(ch); } else { - addSorted(m_matchesUnicode, ch); - addSorted(m_matchesUnicode, getCanonicalPair(info, ch)); + addSorted(ch); + addSorted(getCanonicalPair(info, ch)); } } - void putRange(UChar lo, UChar hi) + void putRange(UChar32 lo, UChar32 hi) { - if (lo <= 0x7f) { + if (isASCII(lo)) { char asciiLo = lo; - char asciiHi = std::min(hi, (UChar)0x7f); + char asciiHi = std::min(hi, (UChar32)0x7f); addSortedRange(m_ranges, lo, asciiHi); if (m_isCaseInsensitive) { @@ -120,19 +183,19 @@ public: addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a')); } } - if (hi <= 0x7f) + if (isASCII(hi)) return; - lo = std::max(lo, (UChar)0x80); + lo = std::max(lo, (UChar32)0x80); addSortedRange(m_rangesUnicode, lo, hi); if (!m_isCaseInsensitive) return; - UCS2CanonicalizationRange* info = rangeInfoFor(lo); + const CanonicalizationRange* info = canonicalRangeInfoFor(lo, m_canonicalMode); while (true) { // Handle the range [lo .. end] - UChar end = std::min<UChar>(info->end, hi); + UChar32 end = std::min<UChar32>(info->end, hi); switch (info->type) { case CanonicalizeUnique: @@ -140,7 +203,7 @@ public: break; case CanonicalizeSet: { UChar ch; - for (uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set) + for (const UChar32* set = canonicalCharacterSetInfo(info->value, m_canonicalMode); (ch = *set); ++set) addSorted(m_matchesUnicode, ch); break; } @@ -175,24 +238,38 @@ public: } - PassOwnPtr<CharacterClass> charClass() + std::unique_ptr<CharacterClass> charClass() { - OwnPtr<CharacterClass> characterClass = adoptPtr(new CharacterClass); + coalesceTables(); + + auto characterClass = std::make_unique<CharacterClass>(); characterClass->m_matches.swap(m_matches); characterClass->m_ranges.swap(m_ranges); characterClass->m_matchesUnicode.swap(m_matchesUnicode); characterClass->m_rangesUnicode.swap(m_rangesUnicode); + characterClass->m_hasNonBMPCharacters = hasNonBMPCharacters(); + characterClass->m_anyCharacter = anyCharacter(); + + m_hasNonBMPCharacters = false; + m_anyCharacter = false; - return characterClass.release(); + return characterClass; } private: - void addSorted(Vector<UChar>& matches, UChar ch) + void addSorted(UChar32 ch) + { + addSorted(isASCII(ch) ? m_matches : m_matchesUnicode, ch); + } + + void addSorted(Vector<UChar32>& matches, UChar32 ch) { unsigned pos = 0; - ASSERT(matches.size() <= UINT_MAX); - unsigned range = static_cast<unsigned>(matches.size()); + unsigned range = matches.size(); + + if (!U_IS_BMP(ch)) + m_hasNonBMPCharacters = true; // binary chop, find position to insert char. while (range) { @@ -201,9 +278,31 @@ private: int val = matches[pos+index] - ch; if (!val) return; - else if (val > 0) + else if (val > 0) { + if (val == 1) { + UChar32 lo = ch; + UChar32 hi = ch + 1; + matches.remove(pos + index); + if (pos + index > 0 && matches[pos + index - 1] == ch - 1) { + lo = ch - 1; + matches.remove(pos + index - 1); + } + addSortedRange(isASCII(ch) ? m_ranges : m_rangesUnicode, lo, hi); + return; + } range = index; - else { + } else { + if (val == -1) { + UChar32 lo = ch - 1; + UChar32 hi = ch; + matches.remove(pos + index); + if (pos + index + 1 < matches.size() && matches[pos + index + 1] == ch + 1) { + hi = ch + 1; + matches.remove(pos + index + 1); + } + addSortedRange(isASCII(ch) ? m_ranges : m_rangesUnicode, lo, hi); + return; + } pos += (index+1); range -= (index+1); } @@ -215,17 +314,19 @@ private: matches.insert(pos, ch); } - void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi) + void addSortedRange(Vector<CharacterRange>& ranges, UChar32 lo, UChar32 hi) { - ASSERT(ranges.size() <= UINT_MAX); - unsigned end = static_cast<unsigned>(ranges.size()); - + size_t end = ranges.size(); + + if (!U_IS_BMP(hi)) + m_hasNonBMPCharacters = true; + // Simple linear scan - I doubt there are that many ranges anyway... // feel free to fix this with something faster (eg binary chop). - for (unsigned i = 0; i < end; ++i) { + for (size_t i = 0; i < end; ++i) { // does the new range fall before the current position in the array if (hi < ranges[i].begin) { - // optional optimization: concatenate appending ranges? - may not be worthwhile. + // Concatenate appending ranges. if (hi == (ranges[i].begin - 1)) { ranges[i].begin = lo; return; @@ -233,7 +334,7 @@ private: ranges.insert(i, CharacterRange(lo, hi)); return; } - // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the beginning + // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining // If the new range start at or before the end of the last range, then the overlap (if it starts one after the // end of the last range they concatenate, which is just as good. if (lo <= (ranges[i].end + 1)) { @@ -241,18 +342,7 @@ private: ranges[i].begin = std::min(ranges[i].begin, lo); ranges[i].end = std::max(ranges[i].end, hi); - // now check if the new range can subsume any subsequent ranges. - unsigned next = i+1; - // each iteration of the loop we will either remove something from the list, or break the loop. - while (next < ranges.size()) { - if (ranges[next].begin <= (ranges[i].end + 1)) { - // the next entry now overlaps / concatenates this one. - ranges[i].end = std::max(ranges[i].end, ranges[next].end); - ranges.remove(next); - } else - break; - } - + mergeRangesFrom(ranges, i); return; } } @@ -261,25 +351,95 @@ private: ranges.append(CharacterRange(lo, hi)); } - bool m_isCaseInsensitive; + void mergeRangesFrom(Vector<CharacterRange>& ranges, size_t index) + { + size_t next = index + 1; + + // each iteration of the loop we will either remove something from the list, or break out of the loop. + while (next < ranges.size()) { + if (ranges[next].begin <= (ranges[index].end + 1)) { + // the next entry now overlaps / concatenates with this one. + ranges[index].end = std::max(ranges[index].end, ranges[next].end); + ranges.remove(next); + } else + break; + } + + } + + void coalesceTables() + { + auto coalesceMatchesAndRanges = [&](Vector<UChar32>& matches, Vector<CharacterRange>& ranges) { + + size_t matchesIndex = 0; + size_t rangesIndex = 0; + + while (matchesIndex < matches.size() && rangesIndex < ranges.size()) { + while (matchesIndex < matches.size() && matches[matchesIndex] < ranges[rangesIndex].begin - 1) + matchesIndex++; + + if (matchesIndex < matches.size() && matches[matchesIndex] == ranges[rangesIndex].begin - 1) { + ranges[rangesIndex].begin = matches[matchesIndex]; + matches.remove(matchesIndex); + } + + while (matchesIndex < matches.size() && matches[matchesIndex] < ranges[rangesIndex].end + 1) + matchesIndex++; + + if (matchesIndex < matches.size()) { + if (matches[matchesIndex] == ranges[rangesIndex].end + 1) { + ranges[rangesIndex].end = matches[matchesIndex]; + matches.remove(matchesIndex); + + mergeRangesFrom(ranges, rangesIndex); + } else + matchesIndex++; + } + } + }; + + coalesceMatchesAndRanges(m_matches, m_ranges); + coalesceMatchesAndRanges(m_matchesUnicode, m_rangesUnicode); + + if (!m_matches.size() && !m_matchesUnicode.size() + && m_ranges.size() == 1 && m_rangesUnicode.size() == 1 + && m_ranges[0].begin == 0 && m_ranges[0].end == 0x7f + && m_rangesUnicode[0].begin == 0x80 && m_rangesUnicode[0].end == 0x10ffff) + m_anyCharacter = true; + } - Vector<UChar> m_matches; + bool hasNonBMPCharacters() + { + return m_hasNonBMPCharacters; + } + + bool anyCharacter() + { + return m_anyCharacter; + } + + bool m_isCaseInsensitive : 1; + bool m_hasNonBMPCharacters : 1; + bool m_anyCharacter : 1; + CanonicalMode m_canonicalMode; + + Vector<UChar32> m_matches; Vector<CharacterRange> m_ranges; - Vector<UChar> m_matchesUnicode; + Vector<UChar32> m_matchesUnicode; Vector<CharacterRange> m_rangesUnicode; }; class YarrPatternConstructor { public: - YarrPatternConstructor(YarrPattern& pattern) + YarrPatternConstructor(YarrPattern& pattern, void* stackLimit) : m_pattern(pattern) - , m_characterClassConstructor(pattern.m_ignoreCase) - , m_invertParentheticalAssertion(false) + , m_characterClassConstructor(pattern.ignoreCase(), pattern.unicode() ? CanonicalMode::Unicode : CanonicalMode::UCS2) + , m_stackLimit(stackLimit) { - OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction); + auto body = std::make_unique<PatternDisjunction>(); m_pattern.m_body = body.get(); m_alternative = body->addNewAlternative(); - m_pattern.m_disjunctions.append(body.release()); + m_pattern.m_disjunctions.append(WTFMove(body)); } ~YarrPatternConstructor() @@ -291,15 +451,15 @@ public: m_pattern.reset(); m_characterClassConstructor.reset(); - OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction); + auto body = std::make_unique<PatternDisjunction>(); m_pattern.m_body = body.get(); m_alternative = body->addNewAlternative(); - m_pattern.m_disjunctions.append(body.release()); + m_pattern.m_disjunctions.append(WTFMove(body)); } void assertionBOL() { - if (!m_alternative->m_terms.size() & !m_invertParentheticalAssertion) { + if (!m_alternative->m_terms.size() && !m_invertParentheticalAssertion) { m_alternative->m_startsWithBOL = true; m_alternative->m_containsBOL = true; m_pattern.m_containsBOL = true; @@ -315,41 +475,51 @@ public: m_alternative->m_terms.append(PatternTerm::WordBoundary(invert)); } - void atomPatternCharacter(UChar ch) + void atomPatternCharacter(UChar32 ch) { // We handle case-insensitive checking of unicode characters which do have both // cases by handling them as if they were defined using a CharacterClass. - if (!m_pattern.m_ignoreCase || isASCII(ch)) { + if (!m_pattern.ignoreCase() || (isASCII(ch) && !m_pattern.unicode())) { m_alternative->m_terms.append(PatternTerm(ch)); return; } - UCS2CanonicalizationRange* info = rangeInfoFor(ch); + const CanonicalizationRange* info = canonicalRangeInfoFor(ch, m_pattern.unicode() ? CanonicalMode::Unicode : CanonicalMode::UCS2); if (info->type == CanonicalizeUnique) { m_alternative->m_terms.append(PatternTerm(ch)); return; } m_characterClassConstructor.putUnicodeIgnoreCase(ch, info); - OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass(); + auto newCharacterClass = m_characterClassConstructor.charClass(); m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), false)); - m_pattern.m_userCharacterClasses.append(newCharacterClass.release()); + m_pattern.m_userCharacterClasses.append(WTFMove(newCharacterClass)); } void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert) { switch (classID) { - case DigitClassID: + case BuiltInCharacterClassID::DigitClassID: m_alternative->m_terms.append(PatternTerm(m_pattern.digitsCharacterClass(), invert)); break; - case SpaceClassID: + case BuiltInCharacterClassID::SpaceClassID: m_alternative->m_terms.append(PatternTerm(m_pattern.spacesCharacterClass(), invert)); break; - case WordClassID: - m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert)); + case BuiltInCharacterClassID::WordClassID: + if (m_pattern.unicode() && m_pattern.ignoreCase()) + m_alternative->m_terms.append(PatternTerm(m_pattern.wordUnicodeIgnoreCaseCharCharacterClass(), invert)); + else + m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert)); + break; + case BuiltInCharacterClassID::DotClassID: + ASSERT(!invert); + if (m_pattern.dotAll()) + m_alternative->m_terms.append(PatternTerm(m_pattern.anyCharacterClass(), false)); + else + m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), true)); break; - case NewlineClassID: - m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), invert)); + default: + m_alternative->m_terms.append(PatternTerm(m_pattern.unicodeCharacterClassFor(classID), invert)); break; } } @@ -359,64 +529,83 @@ public: m_invertCharacterClass = invert; } - void atomCharacterClassAtom(UChar ch) + void atomCharacterClassAtom(UChar32 ch) { m_characterClassConstructor.putChar(ch); } - void atomCharacterClassRange(UChar begin, UChar end) + void atomCharacterClassRange(UChar32 begin, UChar32 end) { m_characterClassConstructor.putRange(begin, end); } void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert) { - ASSERT(classID != NewlineClassID); + ASSERT(classID != BuiltInCharacterClassID::DotClassID); switch (classID) { - case DigitClassID: + case BuiltInCharacterClassID::DigitClassID: m_characterClassConstructor.append(invert ? m_pattern.nondigitsCharacterClass() : m_pattern.digitsCharacterClass()); break; - case SpaceClassID: + case BuiltInCharacterClassID::SpaceClassID: m_characterClassConstructor.append(invert ? m_pattern.nonspacesCharacterClass() : m_pattern.spacesCharacterClass()); break; - case WordClassID: - m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass()); + case BuiltInCharacterClassID::WordClassID: + if (m_pattern.unicode() && m_pattern.ignoreCase()) + m_characterClassConstructor.append(invert ? m_pattern.nonwordUnicodeIgnoreCaseCharCharacterClass() : m_pattern.wordUnicodeIgnoreCaseCharCharacterClass()); + else + m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass()); break; default: - RELEASE_ASSERT_NOT_REACHED(); + if (!invert) + m_characterClassConstructor.append(m_pattern.unicodeCharacterClassFor(classID)); + else + m_characterClassConstructor.appendInverted(m_pattern.unicodeCharacterClassFor(classID)); } } void atomCharacterClassEnd() { - OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass(); + auto newCharacterClass = m_characterClassConstructor.charClass(); + + if (!m_invertCharacterClass && newCharacterClass.get()->m_anyCharacter) { + m_alternative->m_terms.append(PatternTerm(m_pattern.anyCharacterClass(), false)); + return; + } m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), m_invertCharacterClass)); - m_pattern.m_userCharacterClasses.append(newCharacterClass.release()); + m_pattern.m_userCharacterClasses.append(WTFMove(newCharacterClass)); } - void atomParenthesesSubpatternBegin(bool capture = true) + void atomParenthesesSubpatternBegin(bool capture = true, std::optional<String> optGroupName = std::nullopt) { unsigned subpatternId = m_pattern.m_numSubpatterns + 1; - if (capture) + if (capture) { m_pattern.m_numSubpatterns++; + if (optGroupName) { + while (m_pattern.m_captureGroupNames.size() < subpatternId) + m_pattern.m_captureGroupNames.append(String()); + m_pattern.m_captureGroupNames.append(optGroupName.value()); + m_pattern.m_namedGroupToParenIndex.add(optGroupName.value(), subpatternId); + } + } else + ASSERT(!optGroupName); - OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative)); + auto parenthesesDisjunction = std::make_unique<PatternDisjunction>(m_alternative); m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, false)); m_alternative = parenthesesDisjunction->addNewAlternative(); - m_pattern.m_disjunctions.append(parenthesesDisjunction.release()); + m_pattern.m_disjunctions.append(WTFMove(parenthesesDisjunction)); } void atomParentheticalAssertionBegin(bool invert = false) { - OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative)); + auto parenthesesDisjunction = std::make_unique<PatternDisjunction>(m_alternative); m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction.get(), false, invert)); m_alternative = parenthesesDisjunction->addNewAlternative(); m_invertParentheticalAssertion = invert; - m_pattern.m_disjunctions.append(parenthesesDisjunction.release()); + m_pattern.m_disjunctions.append(WTFMove(parenthesesDisjunction)); } void atomParenthesesEnd() @@ -429,8 +618,7 @@ public: PatternTerm& lastTerm = m_alternative->lastTerm(); - ASSERT(parenthesesDisjunction->m_alternatives.size() <= UINT_MAX); - unsigned numParenAlternatives = static_cast<unsigned>(parenthesesDisjunction->m_alternatives.size()); + unsigned numParenAlternatives = parenthesesDisjunction->m_alternatives.size(); unsigned numBOLAnchoredAlts = 0; for (unsigned i = 0; i < numParenAlternatives; i++) { @@ -478,16 +666,22 @@ public: m_alternative->m_terms.append(PatternTerm(subpatternId)); } - // deep copy the argument disjunction. If filterStartsWithBOL is true, + void atomNamedBackReference(String subpatternName) + { + ASSERT(m_pattern.m_namedGroupToParenIndex.find(subpatternName) != m_pattern.m_namedGroupToParenIndex.end()); + atomBackReference(m_pattern.m_namedGroupToParenIndex.get(subpatternName)); + } + + // deep copy the argument disjunction. If filterStartsWithBOL is true, // skip alternatives with m_startsWithBOL set true. PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false) { - OwnPtr<PatternDisjunction> newDisjunction; + std::unique_ptr<PatternDisjunction> newDisjunction; for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { PatternAlternative* alternative = disjunction->m_alternatives[alt].get(); if (!filterStartsWithBOL || !alternative->m_startsWithBOL) { if (!newDisjunction) { - newDisjunction = adoptPtr(new PatternDisjunction()); + newDisjunction = std::make_unique<PatternDisjunction>(); newDisjunction->m_parent = disjunction->m_parent; } PatternAlternative* newAlternative = newDisjunction->addNewAlternative(); @@ -501,7 +695,7 @@ public: return 0; PatternDisjunction* copiedDisjunction = newDisjunction.get(); - m_pattern.m_disjunctions.append(newDisjunction.release()); + m_pattern.m_disjunctions.append(WTFMove(newDisjunction)); return copiedDisjunction; } @@ -512,6 +706,7 @@ public: PatternTerm termCopy = term; termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction, filterStartsWithBOL); + m_pattern.m_hasCopiedParenSubexpressions = true; return termCopy; } @@ -527,7 +722,7 @@ public: PatternTerm& term = m_alternative->lastTerm(); ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary); - ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)); + ASSERT(term.quantityMinCount == 1 && term.quantityMaxCount == 1 && term.quantityType == QuantifierFixedCount); if (term.type == PatternTerm::TypeParentheticalAssertion) { // If an assertion is quantified with a minimum count of zero, it can simply be removed. @@ -549,12 +744,12 @@ public: return; } - if (min == 0) - term.quantify(max, greedy ? QuantifierGreedy : QuantifierNonGreedy); - else if (min == max) - term.quantify(min, QuantifierFixedCount); + if (min == max) + term.quantify(min, max, QuantifierFixedCount); + else if (!min || (term.type == PatternTerm::TypeParenthesesSubpattern && m_pattern.m_hasCopiedParenSubexpressions)) + term.quantify(min, max, greedy ? QuantifierGreedy : QuantifierNonGreedy); else { - term.quantify(min, QuantifierFixedCount); + term.quantify(min, min, QuantifierFixedCount); m_alternative->m_terms.append(copyTerm(term)); // NOTE: this term is interesting from an analysis perspective, in that it can be ignored..... m_alternative->lastTerm().quantify((max == quantifyInfinite) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy); @@ -568,10 +763,14 @@ public: m_alternative = m_alternative->m_parent->addNewAlternative(); } - unsigned setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition) + ErrorCode setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition, unsigned& newCallFrameSize) WARN_UNUSED_RETURN { + if (UNLIKELY(!isSafeToRecurse())) + return ErrorCode::TooManyDisjunctions; + + ErrorCode error = ErrorCode::NoError; alternative->m_hasFixedSize = true; - Checked<unsigned> currentInputPosition = initialInputPosition; + Checked<unsigned, RecordOverflow> currentInputPosition = initialInputPosition; for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { PatternTerm& term = alternative->m_terms[i]; @@ -599,8 +798,14 @@ public: term.frameLocation = currentCallFrameSize; currentCallFrameSize += YarrStackSpaceForBackTrackInfoPatternCharacter; alternative->m_hasFixedSize = false; + } else if (m_pattern.unicode()) { + Checked<unsigned, RecordOverflow> tempCount = term.quantityMaxCount; + tempCount *= U16_LENGTH(term.patternCharacter); + if (tempCount.hasOverflowed()) + return ErrorCode::OffsetTooLarge; + currentInputPosition += tempCount; } else - currentInputPosition += term.quantityCount; + currentInputPosition += term.quantityMaxCount; break; case PatternTerm::TypeCharacterClass: @@ -609,29 +814,39 @@ public: term.frameLocation = currentCallFrameSize; currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass; alternative->m_hasFixedSize = false; + } else if (m_pattern.unicode()) { + term.frameLocation = currentCallFrameSize; + currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass; + currentInputPosition += term.quantityMaxCount; + alternative->m_hasFixedSize = false; } else - currentInputPosition += term.quantityCount; + currentInputPosition += term.quantityMaxCount; break; case PatternTerm::TypeParenthesesSubpattern: // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own. term.frameLocation = currentCallFrameSize; - if (term.quantityCount == 1 && !term.parentheses.isCopy) { - if (term.quantityType != QuantifierFixedCount) - currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce; - currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet()); + if (term.quantityMaxCount == 1 && !term.parentheses.isCopy) { + currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce; + error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet(), currentCallFrameSize); + if (hasError(error)) + return error; // If quantity is fixed, then pre-check its minimum size. if (term.quantityType == QuantifierFixedCount) currentInputPosition += term.parentheses.disjunction->m_minimumSize; term.inputPosition = currentInputPosition.unsafeGet(); } else if (term.parentheses.isTerminal) { currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesTerminal; - currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet()); + error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet(), currentCallFrameSize); + if (hasError(error)) + return error; term.inputPosition = currentInputPosition.unsafeGet(); } else { term.inputPosition = currentInputPosition.unsafeGet(); - setupDisjunctionOffsets(term.parentheses.disjunction, 0, currentInputPosition.unsafeGet()); currentCallFrameSize += YarrStackSpaceForBackTrackInfoParentheses; + error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet(), currentCallFrameSize); + if (hasError(error)) + return error; } // Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length. alternative->m_hasFixedSize = false; @@ -640,35 +855,53 @@ public: case PatternTerm::TypeParentheticalAssertion: term.inputPosition = currentInputPosition.unsafeGet(); term.frameLocation = currentCallFrameSize; - currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + YarrStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition.unsafeGet()); + error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + YarrStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition.unsafeGet(), currentCallFrameSize); + if (hasError(error)) + return error; break; case PatternTerm::TypeDotStarEnclosure: + ASSERT(!m_pattern.m_saveInitialStartValue); alternative->m_hasFixedSize = false; term.inputPosition = initialInputPosition; + m_pattern.m_initialStartValueFrameLocation = currentCallFrameSize; + currentCallFrameSize += YarrStackSpaceForDotStarEnclosure; + m_pattern.m_saveInitialStartValue = true; break; } + if (currentInputPosition.hasOverflowed()) + return ErrorCode::OffsetTooLarge; } alternative->m_minimumSize = (currentInputPosition - initialInputPosition).unsafeGet(); - return currentCallFrameSize; + newCallFrameSize = currentCallFrameSize; + return error; } - unsigned setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition) + ErrorCode setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition, unsigned& callFrameSize) { + if (UNLIKELY(!isSafeToRecurse())) + return ErrorCode::TooManyDisjunctions; + if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.size() > 1)) initialCallFrameSize += YarrStackSpaceForBackTrackInfoAlternative; unsigned minimumInputSize = UINT_MAX; unsigned maximumCallFrameSize = 0; bool hasFixedSize = true; + ErrorCode error = ErrorCode::NoError; for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { PatternAlternative* alternative = disjunction->m_alternatives[alt].get(); - unsigned currentAlternativeCallFrameSize = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition); + unsigned currentAlternativeCallFrameSize; + error = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition, currentAlternativeCallFrameSize); + if (hasError(error)) + return error; minimumInputSize = std::min(minimumInputSize, alternative->m_minimumSize); maximumCallFrameSize = std::max(maximumCallFrameSize, currentAlternativeCallFrameSize); hasFixedSize &= alternative->m_hasFixedSize; + if (alternative->m_minimumSize > INT_MAX) + m_pattern.m_containsUnsignedLengthPattern = true; } ASSERT(minimumInputSize != UINT_MAX); @@ -677,12 +910,15 @@ public: disjunction->m_hasFixedSize = hasFixedSize; disjunction->m_minimumSize = minimumInputSize; disjunction->m_callFrameSize = maximumCallFrameSize; - return maximumCallFrameSize; + callFrameSize = maximumCallFrameSize; + return error; } - void setupOffsets() + ErrorCode setupOffsets() { - setupDisjunctionOffsets(m_pattern.m_body, 0, 0); + // FIXME: Yarr should not use the stack to handle subpatterns (rdar://problem/26436314). + unsigned ignoredCallFrameSize; + return setupDisjunctionOffsets(m_pattern.m_body, 0, 0, ignoredCallFrameSize); } // This optimization identifies sets of parentheses that we will never need to backtrack. @@ -699,14 +935,15 @@ public: if (m_pattern.m_numSubpatterns) return; - Vector<OwnPtr<PatternAlternative> >& alternatives = m_pattern.m_body->m_alternatives; + Vector<std::unique_ptr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives; for (size_t i = 0; i < alternatives.size(); ++i) { Vector<PatternTerm>& terms = alternatives[i]->m_terms; if (terms.size()) { PatternTerm& term = terms.last(); if (term.type == PatternTerm::TypeParenthesesSubpattern && term.quantityType == QuantifierGreedy - && term.quantityCount == quantifyInfinite + && term.quantityMinCount == 0 + && term.quantityMaxCount == quantifyInfinite && !term.capture()) term.parentheses.isTerminal = true; } @@ -722,7 +959,7 @@ public: // At this point, this is only valid for non-multiline expressions. PatternDisjunction* disjunction = m_pattern.m_body; - if (!m_pattern.m_containsBOL || m_pattern.m_multiline) + if (!m_pattern.m_containsBOL || m_pattern.multiline()) return; PatternDisjunction* loopDisjunction = copyDisjunction(disjunction, true); @@ -740,11 +977,12 @@ public: } } - bool containsCapturingTerms(PatternAlternative* alternative, size_t firstTermIndex, size_t lastTermIndex) + bool containsCapturingTerms(PatternAlternative* alternative, size_t firstTermIndex, size_t endIndex) { Vector<PatternTerm>& terms = alternative->m_terms; - for (size_t termIndex = firstTermIndex; termIndex <= lastTermIndex; ++termIndex) { + ASSERT(endIndex <= terms.size()); + for (size_t termIndex = firstTermIndex; termIndex < endIndex; ++termIndex) { PatternTerm& term = terms[termIndex]; if (term.m_capture) @@ -753,7 +991,7 @@ public: if (term.type == PatternTerm::TypeParenthesesSubpattern) { PatternDisjunction* nestedDisjunction = term.parentheses.disjunction; for (unsigned alt = 0; alt < nestedDisjunction->m_alternatives.size(); ++alt) { - if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt].get(), 0, nestedDisjunction->m_alternatives[alt]->m_terms.size() - 1)) + if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt].get(), 0, nestedDisjunction->m_alternatives[alt]->m_terms.size())) return true; } } @@ -769,16 +1007,17 @@ public: // beginning and the end of the match. void optimizeDotStarWrappedExpressions() { - Vector<OwnPtr<PatternAlternative> >& alternatives = m_pattern.m_body->m_alternatives; + Vector<std::unique_ptr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives; if (alternatives.size() != 1) return; + CharacterClass* dotCharacterClass = m_pattern.dotAll() ? m_pattern.anyCharacterClass() : m_pattern.newlineCharacterClass(); PatternAlternative* alternative = alternatives[0].get(); Vector<PatternTerm>& terms = alternative->m_terms; if (terms.size() >= 3) { bool startsWithBOL = false; bool endsWithEOL = false; - size_t termIndex, firstExpressionTerm, lastExpressionTerm; + size_t termIndex, firstExpressionTerm; termIndex = 0; if (terms[termIndex].type == PatternTerm::TypeAssertionBOL) { @@ -787,7 +1026,10 @@ public: } PatternTerm& firstNonAnchorTerm = terms[termIndex]; - if ((firstNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (firstNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || !((firstNonAnchorTerm.quantityType == QuantifierGreedy) || (firstNonAnchorTerm.quantityType == QuantifierNonGreedy))) + if (firstNonAnchorTerm.type != PatternTerm::TypeCharacterClass + || firstNonAnchorTerm.characterClass != dotCharacterClass + || firstNonAnchorTerm.quantityMinCount + || firstNonAnchorTerm.quantityMaxCount != quantifyInfinite) return; firstExpressionTerm = termIndex + 1; @@ -799,16 +1041,19 @@ public: } PatternTerm& lastNonAnchorTerm = terms[termIndex]; - if ((lastNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (lastNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || (lastNonAnchorTerm.quantityType != QuantifierGreedy)) + if (lastNonAnchorTerm.type != PatternTerm::TypeCharacterClass + || lastNonAnchorTerm.characterClass != dotCharacterClass + || lastNonAnchorTerm.quantityType != QuantifierGreedy + || lastNonAnchorTerm.quantityMinCount + || lastNonAnchorTerm.quantityMaxCount != quantifyInfinite) return; - - lastExpressionTerm = termIndex - 1; - if (firstExpressionTerm > lastExpressionTerm) + size_t endIndex = termIndex; + if (firstExpressionTerm >= endIndex) return; - if (!containsCapturingTerms(alternative, firstExpressionTerm, lastExpressionTerm)) { - for (termIndex = terms.size() - 1; termIndex > lastExpressionTerm; --termIndex) + if (!containsCapturingTerms(alternative, firstExpressionTerm, endIndex)) { + for (termIndex = terms.size() - 1; termIndex >= endIndex; --termIndex) terms.remove(termIndex); for (termIndex = firstExpressionTerm; termIndex > 0; --termIndex) @@ -822,62 +1067,445 @@ public: } private: + bool isSafeToRecurse() const + { + if (!m_stackLimit) + return true; + int8_t* curr = reinterpret_cast<int8_t*>(&curr); + int8_t* limit = reinterpret_cast<int8_t*>(m_stackLimit); + return curr >= limit; + } + YarrPattern& m_pattern; PatternAlternative* m_alternative; CharacterClassConstructor m_characterClassConstructor; + void* m_stackLimit; bool m_invertCharacterClass; - bool m_invertParentheticalAssertion; + bool m_invertParentheticalAssertion { false }; }; -const char* YarrPattern::compile(const String& patternString) +ErrorCode YarrPattern::compile(const String& patternString, void* stackLimit) { - YarrPatternConstructor constructor(*this); + YarrPatternConstructor constructor(*this, stackLimit); - if (const char* error = parse(constructor, patternString)) - return error; + if (m_flags == InvalidFlags) + return ErrorCode::InvalidRegularExpressionFlags; + + { + ErrorCode error = parse(constructor, patternString, unicode()); + if (hasError(error)) + return error; + } // If the pattern contains illegal backreferences reset & reparse. // Quoting Netscape's "What's new in JavaScript 1.2", // "Note: if the number of left parentheses is less than the number specified // in \#, the \# is taken as an octal escape as described in the next row." if (containsIllegalBackReference()) { + if (unicode()) + return ErrorCode::InvalidBackreference; + unsigned numSubpatterns = m_numSubpatterns; constructor.reset(); -#if !ASSERT_DISABLED - const char* error = -#endif - parse(constructor, patternString, numSubpatterns); - - ASSERT(!error); + ErrorCode error = parse(constructor, patternString, unicode(), numSubpatterns); + ASSERT_UNUSED(error, !hasError(error)); ASSERT(numSubpatterns == m_numSubpatterns); } constructor.checkForTerminalParentheses(); constructor.optimizeDotStarWrappedExpressions(); constructor.optimizeBOL(); - - constructor.setupOffsets(); - return 0; + { + ErrorCode error = constructor.setupOffsets(); + if (hasError(error)) + return error; + } + + if (Options::dumpCompiledRegExpPatterns()) + dumpPattern(patternString); + + return ErrorCode::NoError; } -YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error) - : m_ignoreCase(ignoreCase) - , m_multiline(multiline) - , m_containsBackreferences(false) +YarrPattern::YarrPattern(const String& pattern, RegExpFlags flags, ErrorCode& error, void* stackLimit) + : m_containsBackreferences(false) , m_containsBOL(false) - , m_numSubpatterns(0) - , m_maxBackReference(0) - , newlineCached(0) - , digitsCached(0) - , spacesCached(0) - , wordcharCached(0) - , nondigitsCached(0) - , nonspacesCached(0) - , nonwordcharCached(0) + , m_containsUnsignedLengthPattern(false) + , m_hasCopiedParenSubexpressions(false) + , m_saveInitialStartValue(false) + , m_flags(flags) +{ + error = compile(pattern, stackLimit); +} + +void indentForNestingLevel(PrintStream& out, unsigned nestingDepth) +{ + out.print(" "); + for (; nestingDepth; --nestingDepth) + out.print(" "); +} + +void dumpUChar32(PrintStream& out, UChar32 c) +{ + if (c >= ' '&& c <= 0xff) + out.printf("'%c'", static_cast<char>(c)); + else + out.printf("0x%04x", c); +} + +void dumpCharacterClass(PrintStream& out, YarrPattern* pattern, CharacterClass* characterClass) +{ + if (characterClass == pattern->anyCharacterClass()) + out.print("<any character>"); + else if (characterClass == pattern->newlineCharacterClass()) + out.print("<newline>"); + else if (characterClass == pattern->digitsCharacterClass()) + out.print("<digits>"); + else if (characterClass == pattern->spacesCharacterClass()) + out.print("<whitespace>"); + else if (characterClass == pattern->wordcharCharacterClass()) + out.print("<word>"); + else if (characterClass == pattern->wordUnicodeIgnoreCaseCharCharacterClass()) + out.print("<unicode ignore case>"); + else if (characterClass == pattern->nondigitsCharacterClass()) + out.print("<non-digits>"); + else if (characterClass == pattern->nonspacesCharacterClass()) + out.print("<non-whitespace>"); + else if (characterClass == pattern->nonwordcharCharacterClass()) + out.print("<non-word>"); + else if (characterClass == pattern->nonwordUnicodeIgnoreCaseCharCharacterClass()) + out.print("<unicode non-ignore case>"); + else { + bool needMatchesRangesSeperator = false; + + auto dumpMatches = [&] (const char* prefix, Vector<UChar32> matches) { + size_t matchesSize = matches.size(); + if (matchesSize) { + if (needMatchesRangesSeperator) + out.print(","); + needMatchesRangesSeperator = true; + + out.print(prefix, ":("); + for (size_t i = 0; i < matchesSize; ++i) { + if (i) + out.print(","); + dumpUChar32(out, matches[i]); + } + out.print(")"); + } + }; + + auto dumpRanges = [&] (const char* prefix, Vector<CharacterRange> ranges) { + size_t rangeSize = ranges.size(); + if (rangeSize) { + if (needMatchesRangesSeperator) + out.print(","); + needMatchesRangesSeperator = true; + + out.print(prefix, " ranges:("); + for (size_t i = 0; i < rangeSize; ++i) { + if (i) + out.print(","); + CharacterRange range = ranges[i]; + out.print("("); + dumpUChar32(out, range.begin); + out.print(".."); + dumpUChar32(out, range.end); + out.print(")"); + } + out.print(")"); + } + }; + + out.print("["); + dumpMatches("ASCII", characterClass->m_matches); + dumpRanges("ASCII", characterClass->m_ranges); + dumpMatches("Unicode", characterClass->m_matchesUnicode); + dumpRanges("Unicode", characterClass->m_rangesUnicode); + out.print("]"); + } +} + +void PatternAlternative::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth) +{ + out.print("minimum size: ", m_minimumSize); + if (m_hasFixedSize) + out.print(",fixed size"); + if (m_onceThrough) + out.print(",once through"); + if (m_startsWithBOL) + out.print(",starts with ^"); + if (m_containsBOL) + out.print(",contains ^"); + out.print("\n"); + + for (size_t i = 0; i < m_terms.size(); ++i) + m_terms[i].dump(out, thisPattern, nestingDepth); +} + +void PatternTerm::dumpQuantifier(PrintStream& out) +{ + if (quantityType == QuantifierFixedCount && quantityMinCount == 1 && quantityMaxCount == 1) + return; + out.print(" {", quantityMinCount.unsafeGet()); + if (quantityMinCount != quantityMaxCount) { + if (quantityMaxCount == UINT_MAX) + out.print(",..."); + else + out.print(",", quantityMaxCount.unsafeGet()); + } + out.print("}"); + if (quantityType == QuantifierGreedy) + out.print(" greedy"); + else if (quantityType == QuantifierNonGreedy) + out.print(" non-greedy"); +} + +void PatternTerm::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth) +{ + indentForNestingLevel(out, nestingDepth); + + if (type != TypeParenthesesSubpattern && type != TypeParentheticalAssertion) { + if (invert()) + out.print("not "); + } + + switch (type) { + case TypeAssertionBOL: + out.println("BOL"); + break; + case TypeAssertionEOL: + out.println("EOL"); + break; + case TypeAssertionWordBoundary: + out.println("word boundary"); + break; + case TypePatternCharacter: + out.printf("character "); + out.printf("inputPosition %u ", inputPosition); + if (thisPattern->ignoreCase() && isASCIIAlpha(patternCharacter)) { + dumpUChar32(out, toASCIIUpper(patternCharacter)); + out.print("/"); + dumpUChar32(out, toASCIILower(patternCharacter)); + } else + dumpUChar32(out, patternCharacter); + dumpQuantifier(out); + if (quantityType != QuantifierFixedCount) + out.print(",frame location ", frameLocation); + out.println(); + break; + case TypeCharacterClass: + out.print("character class "); + if (characterClass->m_anyCharacter) + out.print("<any character>"); + else if (characterClass == thisPattern->newlineCharacterClass()) + out.print("<newline>"); + else if (characterClass == thisPattern->digitsCharacterClass()) + out.print("<digits>"); + else if (characterClass == thisPattern->spacesCharacterClass()) + out.print("<whitespace>"); + else if (characterClass == thisPattern->wordcharCharacterClass()) + out.print("<word>"); + else if (characterClass == thisPattern->wordUnicodeIgnoreCaseCharCharacterClass()) + out.print("<unicode ignore case>"); + else if (characterClass == thisPattern->nondigitsCharacterClass()) + out.print("<non-digits>"); + else if (characterClass == thisPattern->nonspacesCharacterClass()) + out.print("<non-whitespace>"); + else if (characterClass == thisPattern->nonwordcharCharacterClass()) + out.print("<non-word>"); + else if (characterClass == thisPattern->nonwordUnicodeIgnoreCaseCharCharacterClass()) + out.print("<unicode non-ignore case>"); + else { + bool needMatchesRangesSeperator = false; + + auto dumpMatches = [&] (const char* prefix, Vector<UChar32> matches) { + size_t matchesSize = matches.size(); + if (matchesSize) { + if (needMatchesRangesSeperator) + out.print(","); + needMatchesRangesSeperator = true; + + out.print(prefix, ":("); + for (size_t i = 0; i < matchesSize; ++i) { + if (i) + out.print(","); + dumpUChar32(out, matches[i]); + } + out.print(")"); + } + }; + + auto dumpRanges = [&] (const char* prefix, Vector<CharacterRange> ranges) { + size_t rangeSize = ranges.size(); + if (rangeSize) { + if (needMatchesRangesSeperator) + out.print(","); + needMatchesRangesSeperator = true; + + out.print(prefix, " ranges:("); + for (size_t i = 0; i < rangeSize; ++i) { + if (i) + out.print(","); + CharacterRange range = ranges[i]; + out.print("("); + dumpUChar32(out, range.begin); + out.print(".."); + dumpUChar32(out, range.end); + out.print(")"); + } + out.print(")"); + } + }; + + out.print("["); + dumpMatches("ASCII", characterClass->m_matches); + dumpRanges("ASCII", characterClass->m_ranges); + dumpMatches("Unicode", characterClass->m_matchesUnicode); + dumpRanges("Unicode", characterClass->m_rangesUnicode); + out.print("]"); + } + dumpQuantifier(out); + if (quantityType != QuantifierFixedCount || thisPattern->unicode()) + out.print(",frame location ", frameLocation); + out.println(); + break; + case TypeBackReference: + out.print("back reference to subpattern #", backReferenceSubpatternId); + out.println(",frame location ", frameLocation); + break; + case TypeForwardReference: + out.println("forward reference"); + break; + case TypeParenthesesSubpattern: + if (m_capture) + out.print("captured "); + else + out.print("non-captured "); + + FALLTHROUGH; + case TypeParentheticalAssertion: + if (m_invert) + out.print("inverted "); + + if (type == TypeParenthesesSubpattern) + out.print("subpattern"); + else if (type == TypeParentheticalAssertion) + out.print("assertion"); + + if (m_capture) + out.print(" #", parentheses.subpatternId); + + dumpQuantifier(out); + + if (parentheses.isCopy) + out.print(",copy"); + + if (parentheses.isTerminal) + out.print(",terminal"); + + out.println(",frame location ", frameLocation); + + if (parentheses.disjunction->m_alternatives.size() > 1) { + indentForNestingLevel(out, nestingDepth + 1); + unsigned alternativeFrameLocation = frameLocation; + if (quantityMaxCount == 1 && !parentheses.isCopy) + alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; + else if (parentheses.isTerminal) + alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesTerminal; + else + alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParentheses; + out.println("alternative list,frame location ", alternativeFrameLocation); + } + + parentheses.disjunction->dump(out, thisPattern, nestingDepth + 1); + break; + case TypeDotStarEnclosure: + out.println(".* enclosure,frame location ", thisPattern->m_initialStartValueFrameLocation); + break; + } +} + +void PatternDisjunction::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth = 0) +{ + unsigned alternativeCount = m_alternatives.size(); + for (unsigned i = 0; i < alternativeCount; ++i) { + indentForNestingLevel(out, nestingDepth); + if (alternativeCount > 1) + out.print("alternative #", i, ": "); + m_alternatives[i].get()->dump(out, thisPattern, nestingDepth + (alternativeCount > 1)); + } +} + +void YarrPattern::dumpPattern(const String& patternString) +{ + dumpPattern(WTF::dataFile(), patternString); +} + +void YarrPattern::dumpPattern(PrintStream& out, const String& patternString) +{ + out.print("RegExp pattern for /"); + out.print(patternString); + out.print("/"); + if (global()) + out.print("g"); + if (ignoreCase()) + out.print("i"); + if (multiline()) + out.print("m"); + if (unicode()) + out.print("u"); + if (sticky()) + out.print("y"); + if (m_flags != NoFlags) { + bool printSeperator = false; + out.print(" ("); + if (global()) { + out.print("global"); + printSeperator = true; + } + if (ignoreCase()) { + if (printSeperator) + out.print("|"); + out.print("ignore case"); + printSeperator = true; + } + if (multiline()) { + if (printSeperator) + out.print("|"); + out.print("multiline"); + printSeperator = true; + } + if (unicode()) { + if (printSeperator) + out.print("|"); + out.print("unicode"); + printSeperator = true; + } + if (sticky()) { + if (printSeperator) + out.print("|"); + out.print("sticky"); + printSeperator = true; + } + out.print(")"); + } + out.print(":\n"); + if (m_body->m_callFrameSize) + out.print(" callframe size: ", m_body->m_callFrameSize, "\n"); + m_body->dump(out, this); +} + +std::unique_ptr<CharacterClass> anycharCreate() { - *error = compile(pattern); + auto characterClass = std::make_unique<CharacterClass>(); + characterClass->m_ranges.append(CharacterRange(0x00, 0x7f)); + characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x10ffff)); + characterClass->m_hasNonBMPCharacters = true; + characterClass->m_anyCharacter = true; + return characterClass; } -} } +} } // namespace JSC::Yarr diff --git a/src/3rdparty/masm/yarr/YarrPattern.h b/src/3rdparty/masm/yarr/YarrPattern.h index e7d187c2b3..59decbac46 100644 --- a/src/3rdparty/masm/yarr/YarrPattern.h +++ b/src/3rdparty/masm/yarr/YarrPattern.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009, 2013 Apple Inc. All rights reserved. + * Copyright (C) 2009, 2013-2017 Apple Inc. All rights reserved. * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged * * Redistribution and use in source and binary forms, with or without @@ -24,26 +24,27 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef YarrPattern_h -#define YarrPattern_h +#pragma once +#include "RegExpKey.h" +#include "YarrErrorCode.h" +#include "YarrUnicodeProperties.h" #include <wtf/CheckedArithmetic.h> -#include <wtf/OwnPtr.h> -#include <wtf/PassOwnPtr.h> -#include <wtf/RefCounted.h> +#include <wtf/HashMap.h> +#include <wtf/PrintStream.h> #include <wtf/Vector.h> #include <wtf/text/WTFString.h> -#include <wtf/unicode/Unicode.h> namespace JSC { namespace Yarr { +struct YarrPattern; struct PatternDisjunction; struct CharacterRange { - UChar begin; - UChar end; + UChar32 begin { 0 }; + UChar32 end { 0x10ffff }; - CharacterRange(UChar begin, UChar end) + CharacterRange(UChar32 begin, UChar32 end) : begin(begin) , end(end) { @@ -58,20 +59,38 @@ public: // specified matches and ranges) CharacterClass() : m_table(0) + , m_hasNonBMPCharacters(false) + , m_anyCharacter(false) { } CharacterClass(const char* table, bool inverted) : m_table(table) , m_tableInverted(inverted) + , m_hasNonBMPCharacters(false) + , m_anyCharacter(false) { } - Vector<UChar> m_matches; + CharacterClass(std::initializer_list<UChar32> matches, std::initializer_list<CharacterRange> ranges, std::initializer_list<UChar32> matchesUnicode, std::initializer_list<CharacterRange> rangesUnicode) + : m_matches(matches) + , m_ranges(ranges) + , m_matchesUnicode(matchesUnicode) + , m_rangesUnicode(rangesUnicode) + , m_table(0) + , m_tableInverted(false) + , m_hasNonBMPCharacters(false) + , m_anyCharacter(false) + { + } + + Vector<UChar32> m_matches; Vector<CharacterRange> m_ranges; - Vector<UChar> m_matchesUnicode; + Vector<UChar32> m_matchesUnicode; Vector<CharacterRange> m_rangesUnicode; const char* m_table; - bool m_tableInverted; + bool m_tableInverted : 1; + bool m_hasNonBMPCharacters : 1; + bool m_anyCharacter : 1; }; enum QuantifierType { @@ -96,7 +115,7 @@ struct PatternTerm { bool m_capture :1; bool m_invert :1; union { - UChar patternCharacter; + UChar32 patternCharacter; CharacterClass* characterClass; unsigned backReferenceSubpatternId; struct { @@ -112,18 +131,19 @@ struct PatternTerm { } anchors; }; QuantifierType quantityType; - Checked<unsigned> quantityCount; - int inputPosition; + Checked<unsigned> quantityMinCount; + Checked<unsigned> quantityMaxCount; + unsigned inputPosition; unsigned frameLocation; - PatternTerm(UChar ch) + PatternTerm(UChar32 ch) : type(PatternTerm::TypePatternCharacter) , m_capture(false) , m_invert(false) { patternCharacter = ch; quantityType = QuantifierFixedCount; - quantityCount = 1; + quantityMinCount = quantityMaxCount = 1; } PatternTerm(CharacterClass* charClass, bool invert) @@ -133,7 +153,7 @@ struct PatternTerm { { characterClass = charClass; quantityType = QuantifierFixedCount; - quantityCount = 1; + quantityMinCount = quantityMaxCount = 1; } PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false) @@ -146,7 +166,7 @@ struct PatternTerm { parentheses.isCopy = false; parentheses.isTerminal = false; quantityType = QuantifierFixedCount; - quantityCount = 1; + quantityMinCount = quantityMaxCount = 1; } PatternTerm(Type type, bool invert = false) @@ -155,7 +175,7 @@ struct PatternTerm { , m_invert(invert) { quantityType = QuantifierFixedCount; - quantityCount = 1; + quantityMinCount = quantityMaxCount = 1; } PatternTerm(unsigned spatternId) @@ -165,7 +185,7 @@ struct PatternTerm { { backReferenceSubpatternId = spatternId; quantityType = QuantifierFixedCount; - quantityCount = 1; + quantityMinCount = quantityMaxCount = 1; } PatternTerm(bool bolAnchor, bool eolAnchor) @@ -176,7 +196,7 @@ struct PatternTerm { anchors.bolAnchor = bolAnchor; anchors.eolAnchor = eolAnchor; quantityType = QuantifierFixedCount; - quantityCount = 1; + quantityMinCount = quantityMaxCount = 1; } static PatternTerm ForwardReference() @@ -208,12 +228,32 @@ struct PatternTerm { { return m_capture; } - + + bool containsAnyCaptures() + { + ASSERT(this->type == TypeParenthesesSubpattern); + return parentheses.lastSubpatternId >= parentheses.subpatternId; + } + void quantify(unsigned count, QuantifierType type) { - quantityCount = count; + quantityMinCount = 0; + quantityMaxCount = count; quantityType = type; } + + void quantify(unsigned minCount, unsigned maxCount, QuantifierType type) + { + // Currently only Parentheses can specify a non-zero min with a different max. + ASSERT(this->type == TypeParenthesesSubpattern || !minCount || minCount == maxCount); + ASSERT(minCount <= maxCount); + quantityMinCount = minCount; + quantityMaxCount = maxCount; + quantityType = type; + } + + void dumpQuantifier(PrintStream&); + void dump(PrintStream&, YarrPattern*, unsigned); }; struct PatternAlternative { @@ -250,6 +290,8 @@ public: return m_onceThrough; } + void dump(PrintStream&, YarrPattern*, unsigned); + Vector<PatternTerm> m_terms; PatternDisjunction* m_parent; unsigned m_minimumSize; @@ -270,12 +312,13 @@ public: PatternAlternative* addNewAlternative() { - PatternAlternative* alternative = new PatternAlternative(this); - m_alternatives.append(adoptPtr(alternative)); - return alternative; + m_alternatives.append(std::make_unique<PatternAlternative>(this)); + return static_cast<PatternAlternative*>(m_alternatives.last().get()); } - Vector<OwnPtr<PatternAlternative> > m_alternatives; + void dump(PrintStream&, YarrPattern*, unsigned); + + Vector<std::unique_ptr<PatternAlternative>> m_alternatives; PatternAlternative* m_parent; unsigned m_minimumSize; unsigned m_callFrameSize; @@ -286,13 +329,17 @@ public: // (please to be calling newlineCharacterClass() et al on your // friendly neighborhood YarrPattern instance to get nicely // cached copies). -CharacterClass* newlineCreate(); -CharacterClass* digitsCreate(); -CharacterClass* spacesCreate(); -CharacterClass* wordcharCreate(); -CharacterClass* nondigitsCreate(); -CharacterClass* nonspacesCreate(); -CharacterClass* nonwordcharCreate(); + +std::unique_ptr<CharacterClass> anycharCreate(); +std::unique_ptr<CharacterClass> newlineCreate(); +std::unique_ptr<CharacterClass> digitsCreate(); +std::unique_ptr<CharacterClass> spacesCreate(); +std::unique_ptr<CharacterClass> wordcharCreate(); +std::unique_ptr<CharacterClass> wordUnicodeIgnoreCaseCharCreate(); +std::unique_ptr<CharacterClass> nondigitsCreate(); +std::unique_ptr<CharacterClass> nonspacesCreate(); +std::unique_ptr<CharacterClass> nonwordcharCreate(); +std::unique_ptr<CharacterClass> nonwordUnicodeIgnoreCaseCharCreate(); struct TermChain { TermChain(PatternTerm term) @@ -303,27 +350,37 @@ struct TermChain { Vector<TermChain> hotTerms; }; + struct YarrPattern { - JS_EXPORT_PRIVATE YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error); + JS_EXPORT_PRIVATE YarrPattern(const String& pattern, RegExpFlags, ErrorCode&, void* stackLimit = nullptr); void reset() { m_numSubpatterns = 0; m_maxBackReference = 0; + m_initialStartValueFrameLocation = 0; m_containsBackreferences = false; m_containsBOL = false; - - newlineCached = 0; - digitsCached = 0; - spacesCached = 0; - wordcharCached = 0; - nondigitsCached = 0; - nonspacesCached = 0; - nonwordcharCached = 0; + m_containsUnsignedLengthPattern = false; + m_hasCopiedParenSubexpressions = false; + m_saveInitialStartValue = false; + + anycharCached = nullptr; + newlineCached = nullptr; + digitsCached = nullptr; + spacesCached = nullptr; + wordcharCached = nullptr; + wordUnicodeIgnoreCaseCharCached = nullptr; + nondigitsCached = nullptr; + nonspacesCached = nullptr; + nonwordcharCached = nullptr; + nonwordUnicodeIgnoreCasecharCached = nullptr; + unicodePropertiesCached.clear(); m_disjunctions.clear(); m_userCharacterClasses.clear(); + m_captureGroupNames.shrink(0); } bool containsIllegalBackReference() @@ -331,71 +388,212 @@ struct YarrPattern { return m_maxBackReference > m_numSubpatterns; } + bool containsUnsignedLengthPattern() + { + return m_containsUnsignedLengthPattern; + } + + CharacterClass* anyCharacterClass() + { + if (!anycharCached) { + m_userCharacterClasses.append(anycharCreate()); + anycharCached = m_userCharacterClasses.last().get(); + } + return anycharCached; + } CharacterClass* newlineCharacterClass() { - if (!newlineCached) - m_userCharacterClasses.append(adoptPtr(newlineCached = newlineCreate())); + if (!newlineCached) { + m_userCharacterClasses.append(newlineCreate()); + newlineCached = m_userCharacterClasses.last().get(); + } return newlineCached; } CharacterClass* digitsCharacterClass() { - if (!digitsCached) - m_userCharacterClasses.append(adoptPtr(digitsCached = digitsCreate())); + if (!digitsCached) { + m_userCharacterClasses.append(digitsCreate()); + digitsCached = m_userCharacterClasses.last().get(); + } return digitsCached; } CharacterClass* spacesCharacterClass() { - if (!spacesCached) - m_userCharacterClasses.append(adoptPtr(spacesCached = spacesCreate())); + if (!spacesCached) { + m_userCharacterClasses.append(spacesCreate()); + spacesCached = m_userCharacterClasses.last().get(); + } return spacesCached; } CharacterClass* wordcharCharacterClass() { - if (!wordcharCached) - m_userCharacterClasses.append(adoptPtr(wordcharCached = wordcharCreate())); + if (!wordcharCached) { + m_userCharacterClasses.append(wordcharCreate()); + wordcharCached = m_userCharacterClasses.last().get(); + } return wordcharCached; } + CharacterClass* wordUnicodeIgnoreCaseCharCharacterClass() + { + if (!wordUnicodeIgnoreCaseCharCached) { + m_userCharacterClasses.append(wordUnicodeIgnoreCaseCharCreate()); + wordUnicodeIgnoreCaseCharCached = m_userCharacterClasses.last().get(); + } + return wordUnicodeIgnoreCaseCharCached; + } CharacterClass* nondigitsCharacterClass() { - if (!nondigitsCached) - m_userCharacterClasses.append(adoptPtr(nondigitsCached = nondigitsCreate())); + if (!nondigitsCached) { + m_userCharacterClasses.append(nondigitsCreate()); + nondigitsCached = m_userCharacterClasses.last().get(); + } return nondigitsCached; } CharacterClass* nonspacesCharacterClass() { - if (!nonspacesCached) - m_userCharacterClasses.append(adoptPtr(nonspacesCached = nonspacesCreate())); + if (!nonspacesCached) { + m_userCharacterClasses.append(nonspacesCreate()); + nonspacesCached = m_userCharacterClasses.last().get(); + } return nonspacesCached; } CharacterClass* nonwordcharCharacterClass() { - if (!nonwordcharCached) - m_userCharacterClasses.append(adoptPtr(nonwordcharCached = nonwordcharCreate())); + if (!nonwordcharCached) { + m_userCharacterClasses.append(nonwordcharCreate()); + nonwordcharCached = m_userCharacterClasses.last().get(); + } return nonwordcharCached; } + CharacterClass* nonwordUnicodeIgnoreCaseCharCharacterClass() + { + if (!nonwordUnicodeIgnoreCasecharCached) { + m_userCharacterClasses.append(nonwordUnicodeIgnoreCaseCharCreate()); + nonwordUnicodeIgnoreCasecharCached = m_userCharacterClasses.last().get(); + } + return nonwordUnicodeIgnoreCasecharCached; + } + CharacterClass* unicodeCharacterClassFor(BuiltInCharacterClassID unicodeClassID) + { + ASSERT(unicodeClassID >= BuiltInCharacterClassID::BaseUnicodePropertyID); + + unsigned classID = static_cast<unsigned>(unicodeClassID); + + if (unicodePropertiesCached.find(classID) == unicodePropertiesCached.end()) { + m_userCharacterClasses.append(createUnicodeCharacterClassFor(unicodeClassID)); + CharacterClass* result = m_userCharacterClasses.last().get(); + unicodePropertiesCached.add(classID, result); + return result; + } + + return unicodePropertiesCached.get(classID); + } + + void dumpPattern(const String& pattern); + void dumpPattern(PrintStream& out, const String& pattern); + + bool global() const { return m_flags & FlagGlobal; } + bool ignoreCase() const { return m_flags & FlagIgnoreCase; } + bool multiline() const { return m_flags & FlagMultiline; } + bool sticky() const { return m_flags & FlagSticky; } + bool unicode() const { return m_flags & FlagUnicode; } + bool dotAll() const { return m_flags & FlagDotAll; } - bool m_ignoreCase : 1; - bool m_multiline : 1; bool m_containsBackreferences : 1; bool m_containsBOL : 1; - unsigned m_numSubpatterns; - unsigned m_maxBackReference; + bool m_containsUnsignedLengthPattern : 1; + bool m_hasCopiedParenSubexpressions : 1; + bool m_saveInitialStartValue : 1; + RegExpFlags m_flags; + unsigned m_numSubpatterns { 0 }; + unsigned m_maxBackReference { 0 }; + unsigned m_initialStartValueFrameLocation { 0 }; PatternDisjunction* m_body; - Vector<OwnPtr<PatternDisjunction>, 4> m_disjunctions; - Vector<OwnPtr<CharacterClass> > m_userCharacterClasses; + Vector<std::unique_ptr<PatternDisjunction>, 4> m_disjunctions; + Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses; + Vector<String> m_captureGroupNames; + HashMap<String, unsigned> m_namedGroupToParenIndex; private: - const char* compile(const String& patternString); - - CharacterClass* newlineCached; - CharacterClass* digitsCached; - CharacterClass* spacesCached; - CharacterClass* wordcharCached; - CharacterClass* nondigitsCached; - CharacterClass* nonspacesCached; - CharacterClass* nonwordcharCached; + ErrorCode compile(const String& patternString, void* stackLimit); + + CharacterClass* anycharCached { nullptr }; + CharacterClass* newlineCached { nullptr }; + CharacterClass* digitsCached { nullptr }; + CharacterClass* spacesCached { nullptr }; + CharacterClass* wordcharCached { nullptr }; + CharacterClass* wordUnicodeIgnoreCaseCharCached { nullptr }; + CharacterClass* nondigitsCached { nullptr }; + CharacterClass* nonspacesCached { nullptr }; + CharacterClass* nonwordcharCached { nullptr }; + CharacterClass* nonwordUnicodeIgnoreCasecharCached { nullptr }; + HashMap<unsigned, CharacterClass*> unicodePropertiesCached; }; -} } // namespace JSC::Yarr + void indentForNestingLevel(PrintStream&, unsigned); + void dumpUChar32(PrintStream&, UChar32); + void dumpCharacterClass(PrintStream&, YarrPattern*, CharacterClass*); + + struct BackTrackInfoPatternCharacter { + uintptr_t begin; // Only needed for unicode patterns + uintptr_t matchAmount; + + static unsigned beginIndex() { return offsetof(BackTrackInfoPatternCharacter, begin) / sizeof(uintptr_t); } + static unsigned matchAmountIndex() { return offsetof(BackTrackInfoPatternCharacter, matchAmount) / sizeof(uintptr_t); } + }; -#endif // YarrPattern_h + struct BackTrackInfoCharacterClass { + uintptr_t begin; // Only needed for unicode patterns + uintptr_t matchAmount; + + static unsigned beginIndex() { return offsetof(BackTrackInfoCharacterClass, begin) / sizeof(uintptr_t); } + static unsigned matchAmountIndex() { return offsetof(BackTrackInfoCharacterClass, matchAmount) / sizeof(uintptr_t); } + }; + + struct BackTrackInfoBackReference { + uintptr_t begin; // Not really needed for greedy quantifiers. + uintptr_t matchAmount; // Not really needed for fixed quantifiers. + + unsigned beginIndex() { return offsetof(BackTrackInfoBackReference, begin) / sizeof(uintptr_t); } + unsigned matchAmountIndex() { return offsetof(BackTrackInfoBackReference, matchAmount) / sizeof(uintptr_t); } + }; + + struct BackTrackInfoAlternative { + union { + uintptr_t offset; + }; + }; + + struct BackTrackInfoParentheticalAssertion { + uintptr_t begin; + + static unsigned beginIndex() { return offsetof(BackTrackInfoParentheticalAssertion, begin) / sizeof(uintptr_t); } + }; + + struct BackTrackInfoParenthesesOnce { + uintptr_t begin; + uintptr_t returnAddress; + + static unsigned beginIndex() { return offsetof(BackTrackInfoParenthesesOnce, begin) / sizeof(uintptr_t); } + static unsigned returnAddressIndex() { return offsetof(BackTrackInfoParenthesesOnce, returnAddress) / sizeof(uintptr_t); } + }; + + struct BackTrackInfoParenthesesTerminal { + uintptr_t begin; + + static unsigned beginIndex() { return offsetof(BackTrackInfoParenthesesTerminal, begin) / sizeof(uintptr_t); } + }; + + struct BackTrackInfoParentheses { + uintptr_t begin; + uintptr_t returnAddress; + uintptr_t matchAmount; + uintptr_t parenContextHead; + + static unsigned beginIndex() { return offsetof(BackTrackInfoParentheses, begin) / sizeof(uintptr_t); } + static unsigned returnAddressIndex() { return offsetof(BackTrackInfoParentheses, returnAddress) / sizeof(uintptr_t); } + static unsigned matchAmountIndex() { return offsetof(BackTrackInfoParentheses, matchAmount) / sizeof(uintptr_t); } + static unsigned parenContextHeadIndex() { return offsetof(BackTrackInfoParentheses, parenContextHead) / sizeof(uintptr_t); } + }; + +} } // namespace JSC::Yarr diff --git a/src/3rdparty/masm/yarr/YarrSyntaxChecker.cpp b/src/3rdparty/masm/yarr/YarrSyntaxChecker.cpp index aa98c4a354..9f05f22852 100644 --- a/src/3rdparty/masm/yarr/YarrSyntaxChecker.cpp +++ b/src/3rdparty/masm/yarr/YarrSyntaxChecker.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 Apple Inc. All rights reserved. + * Copyright (C) 2011, 2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -27,6 +27,8 @@ #include "YarrSyntaxChecker.h" #include "YarrParser.h" +#include <wtf/Optional.h> +#include <wtf/text/WTFString.h> namespace JSC { namespace Yarr { @@ -35,25 +37,26 @@ public: void assertionBOL() {} void assertionEOL() {} void assertionWordBoundary(bool) {} - void atomPatternCharacter(UChar) {} + void atomPatternCharacter(UChar32) {} void atomBuiltInCharacterClass(BuiltInCharacterClassID, bool) {} void atomCharacterClassBegin(bool = false) {} void atomCharacterClassAtom(UChar) {} void atomCharacterClassRange(UChar, UChar) {} void atomCharacterClassBuiltIn(BuiltInCharacterClassID, bool) {} void atomCharacterClassEnd() {} - void atomParenthesesSubpatternBegin(bool = true) {} + void atomParenthesesSubpatternBegin(bool = true, std::optional<String> = std::nullopt) {} void atomParentheticalAssertionBegin(bool = false) {} void atomParenthesesEnd() {} void atomBackReference(unsigned) {} + void atomNamedBackReference(String) {} void quantifyAtom(unsigned, unsigned, bool) {} void disjunction() {} }; -const char* checkSyntax(const String& pattern) +ErrorCode checkSyntax(const String& pattern, const String& flags) { SyntaxChecker syntaxChecker; - return parse(syntaxChecker, pattern); + return parse(syntaxChecker, pattern, flags.contains('u')); } -}} // JSC::YARR +}} // JSC::Yarr diff --git a/src/3rdparty/masm/yarr/YarrSyntaxChecker.h b/src/3rdparty/masm/yarr/YarrSyntaxChecker.h index 104ced3ab4..86daf38bcb 100644 --- a/src/3rdparty/masm/yarr/YarrSyntaxChecker.h +++ b/src/3rdparty/masm/yarr/YarrSyntaxChecker.h @@ -23,16 +23,13 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef YarrSyntaxChecker_h -#define YarrSyntaxChecker_h +#pragma once +#include "YarrErrorCode.h" #include <wtf/text/WTFString.h> namespace JSC { namespace Yarr { -const char* checkSyntax(const String& pattern); - -}} // JSC::YARR - -#endif // YarrSyntaxChecker_h +ErrorCode checkSyntax(const String& pattern, const String& flags); +}} // JSC::Yarr diff --git a/src/3rdparty/masm/assembler/MacroAssemblerSH4.cpp b/src/3rdparty/masm/yarr/YarrUnicodeProperties.h index 59de3ff48c..20f6739de3 100644 --- a/src/3rdparty/masm/assembler/MacroAssemblerSH4.cpp +++ b/src/3rdparty/masm/yarr/YarrUnicodeProperties.h @@ -1,6 +1,5 @@ /* - * Copyright (C) 2011 STMicroelectronics. All rights reserved. - * Copyright (C) 2008 Apple Inc. All rights reserved. + * Copyright (C) 2017 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -21,32 +20,22 @@ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ -#include "config.h" +#pragma once -#if ENABLE(ASSEMBLER) && CPU(SH4) +#include "Yarr.h" +#include <wtf/Optional.h> +#include <wtf/text/WTFString.h> -#include "MacroAssemblerSH4.h" +namespace JSC { namespace Yarr { -namespace JSC { +struct CharacterClass; -void MacroAssemblerSH4::linkCall(void* code, Call call, FunctionPtr function) -{ - SH4Assembler::linkCall(code, call.m_label, function.value()); -} +JS_EXPORT_PRIVATE std::optional<BuiltInCharacterClassID> unicodeMatchPropertyValue(WTF::String, WTF::String); +JS_EXPORT_PRIVATE std::optional<BuiltInCharacterClassID> unicodeMatchProperty(WTF::String); -void MacroAssemblerSH4::repatchCall(CodeLocationCall call, CodeLocationLabel destination) -{ - SH4Assembler::relinkCall(call.dataLocation(), destination.executableAddress()); -} +std::unique_ptr<CharacterClass> createUnicodeCharacterClassFor(BuiltInCharacterClassID); -void MacroAssemblerSH4::repatchCall(CodeLocationCall call, FunctionPtr destination) -{ - SH4Assembler::relinkCall(call.dataLocation(), destination.executableAddress()); -} - -} // namespace JSC - -#endif // ENABLE(ASSEMBLER) +} } // namespace JSC::Yarr diff --git a/src/3rdparty/masm/create_regex_tables b/src/3rdparty/masm/yarr/create_regex_tables index 7544b75cd9..4c3dbbe3fb 100644 --- a/src/3rdparty/masm/create_regex_tables +++ b/src/3rdparty/masm/yarr/create_regex_tables @@ -1,4 +1,6 @@ -# Copyright (C) 2010, 2013 Apple Inc. All rights reserved. +#!/usr/bin/env python + +# Copyright (C) 2010, 2013-2017 Apple Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -21,16 +23,19 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import os import sys types = { "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]}, - "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]}, + "wordUnicodeIgnoreCaseChar": { "UseTable" : False, "data": ['_', ('0', '9'), ('A', 'Z'), ('a', 'z'), 0x017f, 0x212a]}, + "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x10ffff)]}, + "nonwordUnicodeIgnoreCaseChar": { "UseTable" : False, "Inverse": "wordUnicodeIgnoreCaseChar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x017e), (0x0180, 0x2129), (0x212b, 0x10ffff)]}, "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]}, "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]}, - "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0xffff)]}, + "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0x10ffff)]}, "digits": { "UseTable" : False, "data": [('0', '9')]}, - "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] } + "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0x10ffff)] } } entriesPerLine = 50 arrays = ""; @@ -86,15 +91,16 @@ for name, classes in types.items(): # Generate createFunction: function = ""; - function += ("CharacterClass* %sCreate()\n" % name) + function += ("std::unique_ptr<CharacterClass> %sCreate()\n" % name) function += ("{\n") if emitTables and classes["UseTable"]: if "Inverse" in classes: - function += (" CharacterClass* characterClass = new CharacterClass(_%sData, true);\n" % (classes["Inverse"])) + function += (" auto characterClass = std::make_unique<CharacterClass>(_%sData, true);\n" % (classes["Inverse"])) else: - function += (" CharacterClass* characterClass = new CharacterClass(_%sData, false);\n" % (name)) + function += (" auto characterClass = std::make_unique<CharacterClass>(_%sData, false);\n" % (name)) else: - function += (" CharacterClass* characterClass = new CharacterClass;\n") + function += (" auto characterClass = std::make_unique<CharacterClass>();\n") + hasNonBMPCharacters = False for (min, max) in ranges: if (min == max): if (min > 127): @@ -106,12 +112,19 @@ for name, classes in types.items(): function += (" characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max)) else: function += (" characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max)) + if max >= 0x10000: + hasNonBMPCharacters = True + function += (" characterClass->m_hasNonBMPCharacters = %s;\n" % ("true" if hasNonBMPCharacters else "false")) function += (" return characterClass;\n") function += ("}\n\n") functions += function if (len(sys.argv) > 1): - f = open(sys.argv[-1], "w") + path = sys.argv[-1] + dirname = os.path.dirname(path) + if not os.path.isdir(dirname): + os.makedirs(dirname) + f = open(path, "w") f.write(arrays) f.write(functions) f.close() diff --git a/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode b/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode new file mode 100644 index 0000000000..a103bcdf16 --- /dev/null +++ b/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode @@ -0,0 +1,204 @@ +#! /usr/bin/env python + +# Copyright (C) 2016 Apple Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This tool processes the Unicode Character Database file CaseFolding.txt to create +# canonicalization table as decribed in ECMAScript 6 standard in section +# "21.2.2.8.2 Runtime Semantics: Canonicalize()", step 2. + +import optparse +import os +import re +import sys +from sets import Set + +header = """/* +* Copyright (C) 2016 Apple Inc. All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* +* THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY +* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY +* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// DO NO EDIT! - This file was generated by generateYarrCanonicalizeUnicode + +#include "config.h" +#include "YarrCanonicalize.h" + +namespace JSC { namespace Yarr { + +""" + +footer = """} } // JSC::Yarr +""" + +MaxUnicode = 0x10ffff +commonAndSimpleLinesRE = re.compile(r"(?P<code>[0-9A-F]+)\s*;\s*[CS]\s*;\s*(?P<mapping>[0-9A-F]+)", re.IGNORECASE) + +def openOrExit(path, mode): + try: + dirname = os.path.dirname(path) + if not os.path.isdir(dirname): + os.makedirs(dirname) + return open(path, mode) + except IOError as e: + print "I/O error opening {0}, ({1}): {2}".format(path, e.errno, e.strerror) + exit(1) + +class Canonicalize: + def __init__(self): + self.canonicalGroups = {}; + + def addMapping(self, code, mapping): + if mapping not in self.canonicalGroups: + self.canonicalGroups[mapping] = [] + self.canonicalGroups[mapping].append(code) + + def readCaseFolding(self, file): + codesSeen = Set() + for line in file: + line = line.split('#', 1)[0] + line = line.rstrip() + if (not len(line)): + continue + + fields = commonAndSimpleLinesRE.match(line) + if (not fields): + continue + + code = int(fields.group('code'), 16) + mapping = int(fields.group('mapping'), 16) + + codesSeen.add(code) + self.addMapping(code, mapping) + + for i in range(MaxUnicode + 1): + if i in codesSeen: + continue; + + self.addMapping(i, i) + + def createTables(self, file): + typeInfo = [""] * (MaxUnicode + 1) + characterSets = [] + + for mapping in sorted(self.canonicalGroups.keys()): + characters = self.canonicalGroups[mapping] + if len(characters) == 1: + typeInfo[characters[0]] = "CanonicalizeUnique:0" + else: + characters.sort() + if len(characters) > 2: + for ch in characters: + typeInfo[ch] = "CanonicalizeSet:%d" % len(characterSets) + characterSets.append(characters) + else: + low = characters[0] + high = characters[1] + delta = high - low + if delta == 1: + type = "CanonicalizeAlternatingUnaligned:0" if low & 1 else "CanonicalizeAlternatingAligned:0" + typeInfo[low] = type + typeInfo[high] = type + else: + typeInfo[low] = "CanonicalizeRangeLo:%d" % delta + typeInfo[high] = "CanonicalizeRangeHi:%d" % delta + + rangeInfo = [] + end = 0 + while end <= MaxUnicode: + begin = end + type = typeInfo[end] + while end < MaxUnicode and typeInfo[end + 1] == type: + end = end + 1 + rangeInfo.append({"begin": begin, "end": end, "type": type}) + end = end + 1 + + for i in range(len(characterSets)): + characters = "" + set = characterSets[i] + for ch in set: + characters = characters + "0x{character:04x}, ".format(character=ch) + file.write("const UChar32 unicodeCharacterSet{index:d}[] = {{ {characters}0 }};\n".format(index=i, characters=characters)) + + file.write("\n") + file.write("static const size_t UNICODE_CANONICALIZATION_SETS = {setCount:d};\n".format(setCount=len(characterSets))) + file.write("const UChar32* const unicodeCharacterSetInfo[UNICODE_CANONICALIZATION_SETS] = {\n") + + for i in range(len(characterSets)): + file.write(" unicodeCharacterSet{setNumber:d},\n".format(setNumber=i)) + + file.write("};\n") + file.write("\n") + file.write("const size_t UNICODE_CANONICALIZATION_RANGES = {rangeCount:d};\n".format(rangeCount=len(rangeInfo))) + file.write("const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] = {\n") + + for info in rangeInfo: + typeAndValue = info["type"].split(":") + file.write(" {{ 0x{begin:04x}, 0x{end:04x}, 0x{value:04x}, {type} }},\n".format(begin=info["begin"], end=info["end"], value=int(typeAndValue[1]), type=typeAndValue[0])) + + file.write("};\n") + file.write("\n") + + +if __name__ == "__main__": + parser = optparse.OptionParser(usage = "usage: %prog <CaseFolding.txt> <YarrCanonicalizeUnicode.h>") + (options, args) = parser.parse_args() + + if len(args) != 2: + parser.error("<CaseFolding.txt> <YarrCanonicalizeUnicode.h>") + + caseFoldingTxtPath = args[0] + canonicalizeHPath = args[1] + caseFoldingTxtFile = openOrExit(caseFoldingTxtPath, "r") + canonicalizeHFile = openOrExit(canonicalizeHPath, "wb") + + canonicalize = Canonicalize() + canonicalize.readCaseFolding(caseFoldingTxtFile) + + canonicalizeHFile.write(header); + canonicalize.createTables(canonicalizeHFile) + canonicalizeHFile.write(footer); + + caseFoldingTxtFile.close() + canonicalizeHFile.close() + + exit(0) diff --git a/src/3rdparty/masm/yarr/yarr.pri b/src/3rdparty/masm/yarr/yarr.pri index 7e9b4d3f3b..c8e30990be 100644 --- a/src/3rdparty/masm/yarr/yarr.pri +++ b/src/3rdparty/masm/yarr/yarr.pri @@ -8,5 +8,8 @@ SOURCES += \ $$PWD/YarrInterpreter.cpp \ $$PWD/YarrPattern.cpp \ $$PWD/YarrSyntaxChecker.cpp \ - $$PWD/YarrCanonicalizeUCS2.cpp + $$PWD/YarrCanonicalizeUCS2.cpp \ + $$PWD/YarrCanonicalizeUnicode.cpp \ + $$PWD/YarrErrorCode.cpp \ + $$PWD/YarrUnicodeProperties.cpp |