diff options
Diffstat (limited to 'src/3rdparty/masm')
28 files changed, 786 insertions, 494 deletions
diff --git a/src/3rdparty/masm/assembler/ARM64Assembler.h b/src/3rdparty/masm/assembler/ARM64Assembler.h index a9166e83a2..ca6b33d39a 100644 --- a/src/3rdparty/masm/assembler/ARM64Assembler.h +++ b/src/3rdparty/masm/assembler/ARM64Assembler.h @@ -26,7 +26,7 @@ #ifndef ARM64Assembler_h #define ARM64Assembler_h -#if ENABLE(ASSEMBLER) && (CPU(ARM64) || defined(V4_BOOTSTRAP)) +#if ENABLE(ASSEMBLER) && CPU(ARM64) #include "AssemblerBuffer.h" #include "AbstractMacroAssembler.h" @@ -3021,10 +3021,7 @@ public: static void cacheFlush(void* code, size_t size) { -#if defined(V4_BOOTSTRAP) - UNUSED_PARAM(code) - UNUSED_PARAM(size) -#elif OS(IOS) +#if OS(IOS) sys_cache_control(kCacheFunctionPrepareForExecution, code, size); #elif OS(LINUX) size_t page = pageSize(); diff --git a/src/3rdparty/masm/assembler/ARMv7Assembler.h b/src/3rdparty/masm/assembler/ARMv7Assembler.h index d57e5a7c78..03cb9f42f8 100644 --- a/src/3rdparty/masm/assembler/ARMv7Assembler.h +++ b/src/3rdparty/masm/assembler/ARMv7Assembler.h @@ -27,7 +27,7 @@ #ifndef ARMAssembler_h #define ARMAssembler_h -#if ENABLE(ASSEMBLER) && (CPU(ARM_THUMB2) || defined(V4_BOOTSTRAP)) +#if ENABLE(ASSEMBLER) && CPU(ARM_THUMB2) #include "AssemblerBuffer.h" #include "MacroAssemblerCodeRef.h" @@ -40,6 +40,10 @@ #include <libkern/OSCacheControl.h> #endif +#if OS(RTEMS) +#include <rtems/rtems/cache.h> +#endif + namespace JSC { namespace ARMRegisters { @@ -2166,7 +2170,6 @@ public: linkJumpAbsolute(location, to); } -#if !defined(V4_BOOTSTRAP) static void linkCall(void* code, AssemblerLabel from, void* to) { ASSERT(!(reinterpret_cast<intptr_t>(code) & 1)); @@ -2175,14 +2178,12 @@ public: setPointer(reinterpret_cast<uint16_t*>(reinterpret_cast<intptr_t>(code) + from.m_offset) - 1, to, false); } -#endif static void linkPointer(void* code, AssemblerLabel where, void* value) { setPointer(reinterpret_cast<char*>(code) + where.m_offset, value, false); } -#if !defined(V4_BOOTSTRAP) static void relinkJump(void* from, void* to) { ASSERT(!(reinterpret_cast<intptr_t>(from) & 1)); @@ -2205,7 +2206,6 @@ public: { return readPointer(reinterpret_cast<uint16_t*>(from) - 1); } -#endif static void repatchInt32(void* where, int32_t value) { @@ -2234,7 +2234,6 @@ public: cacheFlush(location, sizeof(uint16_t) * 2); } -#if !defined(V4_BOOTSTRAP) static void repatchPointer(void* where, void* value) { ASSERT(!(reinterpret_cast<intptr_t>(where) & 1)); @@ -2246,7 +2245,6 @@ public: { return reinterpret_cast<void*>(readInt32(where)); } -#endif static void replaceWithJump(void* instructionStart, void* to) { @@ -2321,7 +2319,7 @@ public: unsigned debugOffset() { return m_formatter.debugOffset(); } -#if OS(LINUX) && !defined(V4_BOOTSTRAP) +#if OS(LINUX) static inline void linuxPageFlush(uintptr_t begin, uintptr_t end) { asm volatile( @@ -2341,10 +2339,7 @@ public: static void cacheFlush(void* code, size_t size) { -#if defined(V4_BOOTSTRAP) - UNUSED_PARAM(code) - UNUSED_PARAM(size) -#elif OS(IOS) +#if OS(IOS) sys_cache_control(kCacheFunctionPrepareForExecution, code, size); #elif OS(LINUX) size_t page = pageSize(); @@ -2368,6 +2363,8 @@ public: #elif OS(QNX) #if !ENABLE(ASSEMBLER_WX_EXCLUSIVE) msync(code, size, MS_INVALIDATE_ICACHE); +#elif OS(RTEMS) + rtems_cache_flush_multiple_data_lines(code, size); #else UNUSED_PARAM(code); UNUSED_PARAM(size); @@ -2662,11 +2659,6 @@ private: static void linkBX(uint16_t* instruction, void* target) { -#if defined(V4_BOOTSTRAP) - UNUSED_PARAM(instruction); - UNUSED_PARAM(target); - RELEASE_ASSERT_NOT_REACHED(); -#else // FIMXE: this should be up in the MacroAssembler layer. :-( ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1)); ASSERT(!(reinterpret_cast<intptr_t>(target) & 1)); @@ -2679,7 +2671,6 @@ private: instruction[-3] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16); instruction[-2] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, hi16); instruction[-1] = OP_BX | (JUMP_TEMPORARY_REGISTER << 3); -#endif } void linkConditionalBX(Condition cond, uint16_t* instruction, void* target) @@ -2712,9 +2703,6 @@ private: instruction[-3] = OP_NOP_T2b; linkJumpT4(instruction, target); } else { -#if defined(V4_BOOTSTRAP) - RELEASE_ASSERT_NOT_REACHED(); -#else const uint16_t JUMP_TEMPORARY_REGISTER = ARMRegisters::ip; ARMThumbImmediate lo16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) + 1)); ARMThumbImmediate hi16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) >> 16)); @@ -2723,7 +2711,6 @@ private: instruction[-3] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16); instruction[-2] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, hi16); instruction[-1] = OP_BX | (JUMP_TEMPORARY_REGISTER << 3); -#endif } } diff --git a/src/3rdparty/masm/assembler/AbstractMacroAssembler.h b/src/3rdparty/masm/assembler/AbstractMacroAssembler.h index d0c1c4613e..14644a4193 100644 --- a/src/3rdparty/masm/assembler/AbstractMacroAssembler.h +++ b/src/3rdparty/masm/assembler/AbstractMacroAssembler.h @@ -66,7 +66,7 @@ public: typedef MacroAssemblerCodePtr CodePtr; typedef MacroAssemblerCodeRef CodeRef; -#if !CPU(ARM_THUMB2) && !CPU(ARM64) && !defined(V4_BOOTSTRAP) +#if !CPU(ARM_THUMB2) && !CPU(ARM64) class Jump; #endif @@ -328,7 +328,7 @@ public: friend class AbstractMacroAssembler; friend struct DFG::OSRExit; -#if CPU(ARM_THUMB2) || CPU(ARM64) || defined(V4_BOOTSTRAP) +#if CPU(ARM_THUMB2) || CPU(ARM64) using Jump = typename AssemblerType::template Jump<Label>; friend Jump; #else @@ -461,7 +461,7 @@ public: AssemblerLabel m_label; }; -#if CPU(ARM_THUMB2) || CPU(ARM64) || defined(V4_BOOTSTRAP) +#if CPU(ARM_THUMB2) || CPU(ARM64) using Jump = typename AssemblerType::template Jump<Label>; friend Jump; #endif @@ -516,7 +516,7 @@ public: // into the code buffer - it is typically used to link the jump, setting the // relative offset such that when executed it will jump to the desired // destination. -#if !CPU(ARM_THUMB2) && !CPU(ARM64) && !defined(V4_BOOTSTRAP) +#if !CPU(ARM_THUMB2) && !CPU(ARM64) class Jump { template<class TemplateAssemblerType> friend class AbstractMacroAssembler; @@ -528,7 +528,7 @@ public: { } -#if CPU(ARM_THUMB2) || defined(V4_BOOTSTRAP) +#if CPU(ARM_THUMB2) // Fixme: this information should be stored in the instruction stream, not in the Jump object. Jump(AssemblerLabel jmp, ARMv7Assembler::JumpType type = ARMv7Assembler::JumpNoCondition, ARMv7Assembler::Condition condition = ARMv7Assembler::ConditionInvalid) : m_label(jmp) @@ -621,7 +621,7 @@ public: private: AssemblerLabel m_label; -#if CPU(ARM_THUMB2) || defined(V4_BOOTSTRAP) +#if CPU(ARM_THUMB2) ARMv7Assembler::JumpType m_type; ARMv7Assembler::Condition m_condition; #endif @@ -878,12 +878,10 @@ protected: AssemblerType::repatchPointer(dataLabelPtr.dataLocation(), value); } -#if !defined(V4_BOOTSTRAP) static void* readPointer(CodeLocationDataLabelPtr dataLabelPtr) { return AssemblerType::readPointer(dataLabelPtr.dataLocation()); } -#endif static void replaceWithLoad(CodeLocationConvertibleLoad label) { diff --git a/src/3rdparty/masm/assembler/LinkBuffer.h b/src/3rdparty/masm/assembler/LinkBuffer.h index a1bb046d43..8e9a3d9c7a 100644 --- a/src/3rdparty/masm/assembler/LinkBuffer.h +++ b/src/3rdparty/masm/assembler/LinkBuffer.h @@ -374,7 +374,7 @@ public: } }; -#if CPU(ARM_THUMB2) || CPU(ARM64) || defined(V4_BOOTSTRAP) +#if CPU(ARM_THUMB2) || CPU(ARM64) template <typename T> struct BranchCompactingExecutableOffsetCalculator { @@ -509,7 +509,7 @@ inline void BranchCompactingLinkBuffer<MacroAssembler>::linkCode(void* ownerUID, m_executableMemory->shrink(m_size); } -#if CPU(ARM_THUMB2) || defined(V4_BOOTSTRAP) +#if CPU(ARM_THUMB2) template <> class LinkBuffer<JSC::MacroAssembler<MacroAssemblerARMv7>> : public BranchCompactingLinkBuffer<JSC::MacroAssembler<MacroAssemblerARMv7>> { @@ -520,7 +520,7 @@ public: }; #endif -#if CPU(ARM64) || defined(V4_BOOTSTRAP) +#if CPU(ARM64) template <> class LinkBuffer<JSC::MacroAssembler<MacroAssemblerARM64>> : public BranchCompactingLinkBuffer<JSC::MacroAssembler<MacroAssemblerARM64>> { diff --git a/src/3rdparty/masm/assembler/MacroAssembler.h b/src/3rdparty/masm/assembler/MacroAssembler.h index b442a81bd0..aada47303f 100644 --- a/src/3rdparty/masm/assembler/MacroAssembler.h +++ b/src/3rdparty/masm/assembler/MacroAssembler.h @@ -97,10 +97,7 @@ public: using MacroAssemblerBase::load32; -#if defined(V4_BOOTSTRAP) - using MacroAssemblerBase::loadPtr; - using MacroAssemblerBase::storePtr; -#elif CPU(X86_64) || CPU(ARM64) +#if CPU(X86_64) || CPU(ARM64) using MacroAssemblerBase::add64; using MacroAssemblerBase::sub64; using MacroAssemblerBase::xor64; @@ -214,14 +211,12 @@ public: store32(value, addressForPoke(index)); } -#if !defined(V4_BOOTSTRAP) void poke(TrustedImmPtr imm, int index = 0) { storePtr(imm, addressForPoke(index)); } -#endif -#if (CPU(X86_64) || CPU(ARM64)) && !defined(V4_BOOTSTRAP) +#if CPU(X86_64) || CPU(ARM64) void peek64(RegisterID dest, int index = 0) { load64(Address(MacroAssemblerBase::stackPointerRegister, (index * sizeof(void*))), dest); @@ -352,7 +347,6 @@ public: return !(this->random() & (BlindingModulus - 1)); } -#if !defined(V4_BOOTSTRAP) // Ptr methods // On 32-bit platforms (i.e. x86), these methods directly map onto their 32-bit equivalents. // FIXME: should this use a test for 32-bitness instead of this specific exception? @@ -877,7 +871,7 @@ public: { return branchSub64(cond, src1, src2, dest); } -#endif // !defined(V4_BOOTSTRAP) +#endif // !CPU(X86_64) && !CPU(ARM64) #if ENABLE(JIT_CONSTANT_BLINDING) using MacroAssemblerBase::and64; @@ -1101,8 +1095,6 @@ public: #endif -#endif // !CPU(X86_64) - #if ENABLE(JIT_CONSTANT_BLINDING) bool shouldBlind(Imm32 imm) { diff --git a/src/3rdparty/masm/assembler/MacroAssemblerARM64.h b/src/3rdparty/masm/assembler/MacroAssemblerARM64.h index e5a704292d..c0c68f6393 100644 --- a/src/3rdparty/masm/assembler/MacroAssemblerARM64.h +++ b/src/3rdparty/masm/assembler/MacroAssemblerARM64.h @@ -26,7 +26,7 @@ #ifndef MacroAssemblerARM64_h #define MacroAssemblerARM64_h -#if ENABLE(ASSEMBLER) && (CPU(ARM64) || defined(V4_BOOTSTRAP)) +#if ENABLE(ASSEMBLER) && CPU(ARM64) #include "ARM64Assembler.h" #include "AbstractMacroAssembler.h" @@ -211,33 +211,6 @@ public: static bool shouldBlindForSpecificArch(uint32_t value) { return value >= 0x00ffffff; } static bool shouldBlindForSpecificArch(uint64_t value) { return value >= 0x00ffffff; } -#if defined(V4_BOOTSTRAP) - void loadPtr(ImplicitAddress address, RegisterID dest) - { - load64(address, dest); - } - - void subPtr(TrustedImm32 imm, RegisterID dest) - { - sub64(imm, dest); - } - - void addPtr(TrustedImm32 imm, RegisterID dest) - { - add64(imm, dest); - } - - void addPtr(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - add64(imm, src, dest); - } - - void storePtr(RegisterID src, ImplicitAddress address) - { - store64(src, address); - } -#endif - // Integer operations: void add32(RegisterID a, RegisterID b, RegisterID dest) @@ -1126,6 +1099,14 @@ public: m_assembler.ldrh(dest, address.base, memoryTempRegister); } + void load16(ExtendedAddress address, RegisterID dest) + { + moveToCachedReg(TrustedImmPtr(reinterpret_cast<void*>(address.offset)), m_cachedMemoryTempRegister); + m_assembler.ldrh(dest, memoryTempRegister, address.base, ARM64Assembler::UXTX, 1); + if (dest == memoryTempRegister) + m_cachedMemoryTempRegister.invalidate(); + } + void load16Unaligned(ImplicitAddress address, RegisterID dest) { load16(address, dest); @@ -2814,7 +2795,6 @@ public: return branch32(cond, left, dataTempRegister); } -#if !defined(V4_BOOTSTRAP) PatchableJump patchableBranchPtr(RelationalCondition cond, Address left, TrustedImmPtr right) { m_makeJumpPatchable = true; @@ -2822,7 +2802,6 @@ public: m_makeJumpPatchable = false; return PatchableJump(result); } -#endif PatchableJump patchableBranchTest32(ResultCondition cond, RegisterID reg, TrustedImm32 mask = TrustedImm32(-1)) { diff --git a/src/3rdparty/masm/assembler/MacroAssemblerARMv7.h b/src/3rdparty/masm/assembler/MacroAssemblerARMv7.h index 99801a0e3b..6232834fde 100644 --- a/src/3rdparty/masm/assembler/MacroAssemblerARMv7.h +++ b/src/3rdparty/masm/assembler/MacroAssemblerARMv7.h @@ -27,7 +27,7 @@ #ifndef MacroAssemblerARMv7_h #define MacroAssemblerARMv7_h -#if ENABLE(ASSEMBLER) && (CPU(ARM_THUMB2) || defined(V4_BOOTSTRAP)) +#if ENABLE(ASSEMBLER) && CPU(ARM_THUMB2) #include "ARMv7Assembler.h" #include "AbstractMacroAssembler.h" @@ -162,41 +162,12 @@ public: { add32(imm, dest, dest); } - -#if defined(V4_BOOTSTRAP) - void loadPtr(ImplicitAddress address, RegisterID dest) - { - load32(address, dest); - } - - void subPtr(TrustedImm32 imm, RegisterID dest) - { - sub32(imm, dest); - } - - void addPtr(TrustedImm32 imm, RegisterID dest) - { - add32(imm, dest); - } - - void addPtr(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - add32(imm, src, dest); - } - - void storePtr(RegisterID src, ImplicitAddress address) - { - store32(src, address); - } -#endif -#if !defined(V4_BOOTSTRAP) void add32(AbsoluteAddress src, RegisterID dest) { load32(src.m_ptr, dataTempRegister); add32(dataTempRegister, dest); } -#endif void add32(TrustedImm32 imm, RegisterID src, RegisterID dest) { @@ -237,7 +208,6 @@ public: add32(dataTempRegister, dest); } -#if !defined(V4_BOOTSTRAP) void add32(TrustedImm32 imm, AbsoluteAddress address) { load32(address.m_ptr, dataTempRegister); @@ -282,7 +252,6 @@ public: m_assembler.adc(dataTempRegister, dataTempRegister, ARMThumbImmediate::makeEncodedImm(imm.m_value >> 31)); m_assembler.str(dataTempRegister, addressTempRegister, ARMThumbImmediate::makeUInt12(4)); } -#endif void and32(RegisterID op1, RegisterID op2, RegisterID dest) { @@ -384,7 +353,6 @@ public: or32(dataTempRegister, dest); } -#if !defined(V4_BOOTSTRAP) void or32(RegisterID src, AbsoluteAddress dest) { move(TrustedImmPtr(dest.m_ptr), addressTempRegister); @@ -392,7 +360,6 @@ public: or32(src, dataTempRegister); store32(dataTempRegister, addressTempRegister); } -#endif void or32(TrustedImm32 imm, RegisterID dest) { @@ -504,7 +471,6 @@ public: sub32(dataTempRegister, dest); } -#if !defined(V4_BOOTSTRAP) void sub32(TrustedImm32 imm, AbsoluteAddress address) { load32(address.m_ptr, dataTempRegister); @@ -521,7 +487,6 @@ public: store32(dataTempRegister, address.m_ptr); } -#endif void xor32(Address src, RegisterID dest) { @@ -698,13 +663,11 @@ public: load16(setupArmAddress(address), dest); } -#if !defined(V4_BOOTSTRAP) void load32(const void* address, RegisterID dest) { move(TrustedImmPtr(address), addressTempRegister); m_assembler.ldr(dest, addressTempRegister, ARMThumbImmediate::makeUInt16(0)); } -#endif ConvertibleLoadLabel convertibleLoadPtr(Address address, RegisterID dest) { @@ -809,7 +772,6 @@ public: store32(dataTempRegister, setupArmAddress(address)); } -#if !defined(V4_BOOTSTRAP) void store32(RegisterID src, const void* address) { move(TrustedImmPtr(address), addressTempRegister); @@ -821,7 +783,6 @@ public: move(imm, dataTempRegister); store32(dataTempRegister, address); } -#endif void store8(RegisterID src, BaseIndex address) { @@ -839,7 +800,6 @@ public: store8(dataTempRegister, address); } -#if !defined(V4_BOOTSTRAP) void store8(RegisterID src, void* address) { move(TrustedImmPtr(address), addressTempRegister); @@ -851,7 +811,6 @@ public: move(imm, dataTempRegister); store8(dataTempRegister, address); } -#endif void store16(RegisterID src, BaseIndex address) { @@ -949,13 +908,11 @@ public: m_assembler.vmov(dest, src); } -#if !defined(V4_BOOTSTRAP) void loadDouble(const void* address, FPRegisterID dest) { move(TrustedImmPtr(address), addressTempRegister); m_assembler.vldr(dest, addressTempRegister, 0); } -#endif void storeDouble(FPRegisterID src, ImplicitAddress address) { @@ -987,13 +944,11 @@ public: m_assembler.fsts(ARMRegisters::asSingle(src), base, offset); } -#if !defined(V4_BOOTSTRAP) void storeDouble(FPRegisterID src, const void* address) { move(TrustedImmPtr(address), addressTempRegister); storeDouble(src, addressTempRegister); } -#endif void storeDouble(FPRegisterID src, BaseIndex address) { @@ -1027,13 +982,11 @@ public: m_assembler.vadd(dest, op1, op2); } -#if !defined(V4_BOOTSTRAP) void addDouble(AbsoluteAddress address, FPRegisterID dest) { loadDouble(address.m_ptr, fpTempRegister); m_assembler.vadd(dest, dest, fpTempRegister); } -#endif void divDouble(FPRegisterID src, FPRegisterID dest) { @@ -1112,7 +1065,6 @@ public: m_assembler.vcvt_signedToFloatingPoint(dest, fpTempRegisterAsSingle()); } -#if !defined(V4_BOOTSTRAP) void convertInt32ToDouble(AbsoluteAddress address, FPRegisterID dest) { // Fixme: load directly into the fpr! @@ -1120,7 +1072,6 @@ public: m_assembler.vmov(fpTempRegister, dataTempRegister, dataTempRegister); m_assembler.vcvt_signedToFloatingPoint(dest, fpTempRegisterAsSingle()); } -#endif void convertUInt32ToDouble(RegisterID src, FPRegisterID dest, RegisterID /*scratch*/) { @@ -1316,12 +1267,10 @@ public: m_assembler.mov(dest, src); } -#if !defined(V4_BOOTSTRAP) void move(TrustedImmPtr imm, RegisterID dest) { move(TrustedImm32(imm), dest); } -#endif void swap(RegisterID reg1, RegisterID reg2) { @@ -1462,7 +1411,6 @@ public: return branch32(cond, addressTempRegister, right); } -#if !defined(V4_BOOTSTRAP) Jump branch32(RelationalCondition cond, AbsoluteAddress left, RegisterID right) { load32(left.m_ptr, dataTempRegister); @@ -1475,7 +1423,6 @@ public: load32(left.m_ptr, addressTempRegister); return branch32(cond, addressTempRegister, right); } -#endif Jump branch8(RelationalCondition cond, RegisterID left, TrustedImm32 right) { @@ -1532,7 +1479,6 @@ public: return branchTest32(cond, addressTempRegister, mask); } -#if !defined(V4_BOOTSTRAP) Jump branchTest8(ResultCondition cond, AbsoluteAddress address, TrustedImm32 mask = TrustedImm32(-1)) { // use addressTempRegister incase the branchTest8 we call uses dataTempRegister. :-/ @@ -1540,7 +1486,6 @@ public: load8(Address(addressTempRegister), addressTempRegister); return branchTest32(cond, addressTempRegister, mask); } -#endif void jump(RegisterID target) { @@ -1554,14 +1499,12 @@ public: m_assembler.bx(dataTempRegister); } -#if !defined(V4_BOOTSTRAP) void jump(AbsoluteAddress address) { move(TrustedImmPtr(address.m_ptr), dataTempRegister); load32(Address(dataTempRegister), dataTempRegister); m_assembler.bx(dataTempRegister); } -#endif // Arithmetic control flow operations: @@ -1602,7 +1545,6 @@ public: return branchAdd32(cond, dest, imm, dest); } -#if !defined(V4_BOOTSTRAP) Jump branchAdd32(ResultCondition cond, TrustedImm32 imm, AbsoluteAddress dest) { // Move the high bits of the address into addressTempRegister, @@ -1628,7 +1570,6 @@ public: return Jump(makeBranch(cond)); } -#endif Jump branchMul32(ResultCondition cond, RegisterID src1, RegisterID src2, RegisterID dest) { @@ -1799,7 +1740,6 @@ public: return DataLabel32(this); } -#if !defined(V4_BOOTSTRAP) ALWAYS_INLINE DataLabelPtr moveWithPatch(TrustedImmPtr imm, RegisterID dst) { padBeforePatch(); @@ -1827,7 +1767,6 @@ public: m_makeJumpPatchable = false; return PatchableJump(result); } -#endif PatchableJump patchableBranchTest32(ResultCondition cond, RegisterID reg, TrustedImm32 mask = TrustedImm32(-1)) { @@ -1845,7 +1784,6 @@ public: return PatchableJump(result); } -#if !defined(V4_BOOTSTRAP) PatchableJump patchableBranchPtrWithPatch(RelationalCondition cond, Address left, DataLabelPtr& dataLabel, TrustedImmPtr initialRightValue = TrustedImmPtr(0)) { m_makeJumpPatchable = true; @@ -1853,7 +1791,6 @@ public: m_makeJumpPatchable = false; return PatchableJump(result); } -#endif PatchableJump patchableJump() { @@ -1864,7 +1801,6 @@ public: return PatchableJump(result); } -#if !defined(V4_BOOTSTRAP) ALWAYS_INLINE DataLabelPtr storePtrWithPatch(TrustedImmPtr initialValue, ImplicitAddress address) { DataLabelPtr label = moveWithPatch(initialValue, dataTempRegister); @@ -1872,7 +1808,6 @@ public: return label; } ALWAYS_INLINE DataLabelPtr storePtrWithPatch(ImplicitAddress address) { return storePtrWithPatch(TrustedImmPtr(0), address); } -#endif ALWAYS_INLINE Call tailRecursiveCall() { @@ -1893,7 +1828,6 @@ public: return m_assembler.executableOffsetFor(location); } -#if !defined(V4_BOOTSTRAP) static FunctionPtr readCallTarget(CodeLocationCall call) { return FunctionPtr(reinterpret_cast<void(*)()>(ARMv7Assembler::readCallTarget(call.dataLocation()))); @@ -1906,7 +1840,6 @@ public: const unsigned twoWordOpSize = 4; return label.labelAtOffset(-twoWordOpSize * 2); } -#endif static void revertJumpReplacementToBranchPtrWithPatch(CodeLocationLabel instructionStart, RegisterID rd, void* initialValue) { @@ -2024,7 +1957,6 @@ private: template <typename, template <typename> class> friend class LinkBufferBase; friend class RepatchBuffer; -#if !defined(V4_BOOTSTRAP) static void linkCall(void* code, Call call, FunctionPtr function) { ARMv7Assembler::linkCall(code, call.m_label, function.value()); @@ -2039,7 +1971,6 @@ private: { ARMv7Assembler::relinkCall(call.dataLocation(), destination.executableAddress()); } -#endif bool m_makeJumpPatchable; }; diff --git a/src/3rdparty/masm/assembler/MacroAssemblerX86.h b/src/3rdparty/masm/assembler/MacroAssemblerX86.h index e3e0bfe5e1..5cffa787ec 100644 --- a/src/3rdparty/masm/assembler/MacroAssemblerX86.h +++ b/src/3rdparty/masm/assembler/MacroAssemblerX86.h @@ -55,38 +55,6 @@ public: using MacroAssemblerX86Common::convertInt32ToDouble; using MacroAssemblerX86Common::branchTest8; -#if defined(V4_BOOTSTRAP) - void loadPtr(ImplicitAddress address, RegisterID dest) - { - load32(address, dest); - } - - void subPtr(TrustedImm32 imm, RegisterID dest) - { - sub32(imm, dest); - } - - void addPtr(TrustedImm32 imm, RegisterID dest) - { - add32(imm, dest); - } - - void addPtr(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - add32(imm, src, dest); - } - - void storePtr(RegisterID src, ImplicitAddress address) - { - store32(src, address); - } - - Jump branchTest8(ResultCondition cond, ExtendedAddress address, TrustedImm32 mask = TrustedImm32(-1)) - { - return branchTest8(cond, Address(address.base, address.offset), mask); - } -#endif - void add32(TrustedImm32 imm, RegisterID src, RegisterID dest) { m_assembler.leal_mr(imm.m_value, src, dest); diff --git a/src/3rdparty/masm/assembler/MacroAssemblerX86_64.h b/src/3rdparty/masm/assembler/MacroAssemblerX86_64.h index f4349e1f93..0a6db0805b 100644 --- a/src/3rdparty/masm/assembler/MacroAssemblerX86_64.h +++ b/src/3rdparty/masm/assembler/MacroAssemblerX86_64.h @@ -53,33 +53,6 @@ public: using MacroAssemblerX86Common::loadDouble; using MacroAssemblerX86Common::convertInt32ToDouble; -#if defined(V4_BOOTSTRAP) - void loadPtr(ImplicitAddress address, RegisterID dest) - { - load64(address, dest); - } - - void subPtr(TrustedImm32 imm, RegisterID dest) - { - sub64(imm, dest); - } - - void addPtr(TrustedImm32 imm, RegisterID dest) - { - add64(imm, dest); - } - - void addPtr(TrustedImm32 imm, RegisterID src, RegisterID dest) - { - add64(imm, src, dest); - } - - void storePtr(RegisterID src, ImplicitAddress address) - { - store64(src, address); - } -#endif - void add32(TrustedImm32 imm, AbsoluteAddress address) { move(TrustedImmPtr(address.m_ptr), scratchRegister); @@ -116,6 +89,23 @@ public: sub32(imm, Address(scratchRegister)); } + void load16(ExtendedAddress address, RegisterID dest) + { + TrustedImmPtr addr(reinterpret_cast<void*>(address.offset)); + MacroAssemblerX86Common::move(addr, scratchRegister); + MacroAssemblerX86Common::load16(BaseIndex(scratchRegister, address.base, TimesTwo), dest); + } + + void load16(BaseIndex address, RegisterID dest) + { + MacroAssemblerX86Common::load16(address, dest); + } + + void load16(Address address, RegisterID dest) + { + MacroAssemblerX86Common::load16(address, dest); + } + void load32(const void* address, RegisterID dest) { if (dest == X86Registers::eax) diff --git a/src/3rdparty/masm/assembler/X86Assembler.h b/src/3rdparty/masm/assembler/X86Assembler.h index 2257cb2b9a..e8ae687036 100644 --- a/src/3rdparty/masm/assembler/X86Assembler.h +++ b/src/3rdparty/masm/assembler/X86Assembler.h @@ -255,47 +255,6 @@ public: { } -#if defined(V4_BOOTSTRAP) - template <typename LabelType> - class Jump { - template<class TemplateAssemblerType> - friend class AbstractMacroAssembler; - friend class Call; - template <typename, template <typename> class> friend class LinkBufferBase; - public: - Jump() - { - } - - Jump(AssemblerLabel jmp) - : m_label(jmp) - { - } - - LabelType label() const - { - LabelType result; - result.m_label = m_label; - return result; - } - - void link(AbstractMacroAssembler<X86Assembler>* masm) const - { - masm->m_assembler.linkJump(m_label, masm->m_assembler.label()); - } - - void linkTo(LabelType label, AbstractMacroAssembler<X86Assembler>* masm) const - { - masm->m_assembler.linkJump(m_label, label.label()); - } - - bool isSet() const { return m_label.isSet(); } - - private: - AssemblerLabel m_label; - }; -#endif - // Stack operations: void push_r(RegisterID reg) diff --git a/src/3rdparty/masm/masm-defs.pri b/src/3rdparty/masm/masm-defs.pri index 08c46a7ac2..90a795c6ce 100644 --- a/src/3rdparty/masm/masm-defs.pri +++ b/src/3rdparty/masm/masm-defs.pri @@ -33,10 +33,6 @@ disassembler { DEFINES += WTF_USE_UDIS86=0 } -force-compile-jit { - DEFINES += V4_FORCE_COMPILE_JIT -} - INCLUDEPATH += $$PWD/disassembler INCLUDEPATH += $$PWD/disassembler/udis86 INCLUDEPATH += $$_OUT_PWD diff --git a/src/3rdparty/masm/masm.pri b/src/3rdparty/masm/masm.pri index 0e63ac2ce5..1df4585aae 100644 --- a/src/3rdparty/masm/masm.pri +++ b/src/3rdparty/masm/masm.pri @@ -77,7 +77,6 @@ SOURCES += $$PWD/disassembler/ARM64Disassembler.cpp SOURCES += $$PWD/disassembler/ARM64/A64DOpcode.cpp HEADERS += $$PWD/disassembler/ARM64/A64DOpcode.h -!qmldevtools_build { SOURCES += $$PWD/yarr/YarrCanonicalizeUCS2.cpp \ $$PWD/yarr/YarrCanonicalizeUnicode.cpp \ $$PWD/yarr/YarrInterpreter.cpp \ @@ -94,7 +93,6 @@ HEADERS += $$PWD/yarr/Yarr.h \ $$PWD/yarr/YarrPattern.h \ $$PWD/yarr/YarrSyntaxChecker.h \ $$PWD/yarr/YarrUnicodeProperties.h -} # # Generate RegExpJitTables.h @@ -128,8 +126,3 @@ QMAKE_EXTRA_COMPILERS += retgen } } } - -linux { - requires(qtConfig(dlopen)) - QMAKE_USE_PRIVATE += libdl -} diff --git a/src/3rdparty/masm/stubs/wtf/Vector.h b/src/3rdparty/masm/stubs/wtf/Vector.h index f4f4dc5cf4..2fead9f6ba 100644 --- a/src/3rdparty/masm/stubs/wtf/Vector.h +++ b/src/3rdparty/masm/stubs/wtf/Vector.h @@ -109,6 +109,15 @@ public: inline bool isEmpty() const { return this->empty(); } inline T &last() { return *(this->begin() + this->size() - 1); } + + bool contains(const T &value) const + { + for (const T &inVector : *this) { + if (inVector == value) + return true; + } + return false; + } }; template <typename T, int capacity> diff --git a/src/3rdparty/masm/wtf/OSAllocatorPosix.cpp b/src/3rdparty/masm/wtf/OSAllocatorPosix.cpp index 3b2a73a39a..d59fdcd675 100644 --- a/src/3rdparty/masm/wtf/OSAllocatorPosix.cpp +++ b/src/3rdparty/masm/wtf/OSAllocatorPosix.cpp @@ -74,21 +74,9 @@ static int memfdForUsage(size_t bytes, OSAllocator::Usage usage) break; } - // try to get our own library name by giving dladdr a pointer pointing to - // something we know to be in it (using a pointer to string data) - static const char *libname = [=]() { - Dl_info info; - if (dladdr(type, &info) == 0) - info.dli_fname = nullptr; - return info.dli_fname; - }(); - char buf[PATH_MAX]; strcpy(buf, type); - if (libname) - strcat(buf, libname); - else - strcat(buf, "QtQml"); + strcat(buf, "QtQml"); int fd = syscall(SYS_memfd_create, buf, MFD_CLOEXEC); if (fd != -1) { diff --git a/src/3rdparty/masm/wtf/Platform.h b/src/3rdparty/masm/wtf/Platform.h index d5f69927db..ab1da2198a 100644 --- a/src/3rdparty/masm/wtf/Platform.h +++ b/src/3rdparty/masm/wtf/Platform.h @@ -438,6 +438,10 @@ #define WTF_OS_WINDOWS 1 #endif +#ifdef __rtems__ +#define WTF_OS_RTEMS 1 +#endif + #define WTF_OS_WIN ERROR "USE WINDOWS WITH OS NOT WIN" #define WTF_OS_MAC ERROR "USE MAC_OS_X WITH OS NOT MAC" @@ -451,6 +455,7 @@ || OS(NETBSD) \ || OS(OPENBSD) \ || OS(QNX) \ + || OS(RTEMS) \ || OS(SOLARIS) \ || defined(unix) \ || defined(__unix) \ @@ -1051,6 +1056,7 @@ #if CPU(ARM64) || (CPU(X86_64) && !OS(WINDOWS)) /* Enable JIT'ing Regular Expressions that have nested parenthesis. */ #define ENABLE_YARR_JIT_ALL_PARENS_EXPRESSIONS 1 +#define ENABLE_YARR_JIT_BACKREFERENCES 1 #endif #endif diff --git a/src/3rdparty/masm/yarr/YarrCanonicalize.h b/src/3rdparty/masm/yarr/YarrCanonicalize.h index fb5e0231ac..cbd279edca 100644 --- a/src/3rdparty/masm/yarr/YarrCanonicalize.h +++ b/src/3rdparty/masm/yarr/YarrCanonicalize.h @@ -53,6 +53,7 @@ struct CanonicalizationRange { extern const size_t UCS2_CANONICALIZATION_RANGES; extern const UChar32* const ucs2CharacterSetInfo[]; extern const CanonicalizationRange ucs2RangeInfo[]; +extern const uint16_t canonicalTableLChar[256]; extern const size_t UNICODE_CANONICALIZATION_RANGES; extern const UChar32* const unicodeCharacterSetInfo[]; diff --git a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.cpp b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.cpp index d91c771590..0eb59f38d2 100644 --- a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.cpp +++ b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2013, 2015-2016 Apple Inc. All rights reserved. + * Copyright (C) 2012-2018 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -44,9 +44,17 @@ const UChar32 ucs2CharacterSet10[] = { 0x03a0, 0x03c0, 0x03d6, 0 }; const UChar32 ucs2CharacterSet11[] = { 0x03a1, 0x03c1, 0x03f1, 0 }; const UChar32 ucs2CharacterSet12[] = { 0x03a3, 0x03c2, 0x03c3, 0 }; const UChar32 ucs2CharacterSet13[] = { 0x03a6, 0x03c6, 0x03d5, 0 }; -const UChar32 ucs2CharacterSet14[] = { 0x1e60, 0x1e61, 0x1e9b, 0 }; +const UChar32 ucs2CharacterSet14[] = { 0x0412, 0x0432, 0x1c80, 0 }; +const UChar32 ucs2CharacterSet15[] = { 0x0414, 0x0434, 0x1c81, 0 }; +const UChar32 ucs2CharacterSet16[] = { 0x041e, 0x043e, 0x1c82, 0 }; +const UChar32 ucs2CharacterSet17[] = { 0x0421, 0x0441, 0x1c83, 0 }; +const UChar32 ucs2CharacterSet18[] = { 0x0422, 0x0442, 0x1c84, 0x1c85, 0 }; +const UChar32 ucs2CharacterSet19[] = { 0x042a, 0x044a, 0x1c86, 0 }; +const UChar32 ucs2CharacterSet20[] = { 0x0462, 0x0463, 0x1c87, 0 }; +const UChar32 ucs2CharacterSet21[] = { 0x1e60, 0x1e61, 0x1e9b, 0 }; +const UChar32 ucs2CharacterSet22[] = { 0x1c88, 0xa64a, 0xa64b, 0 }; -static const size_t UCS2_CANONICALIZATION_SETS = 15; +static const size_t UCS2_CANONICALIZATION_SETS = 23; const UChar32* const ucs2CharacterSetInfo[UCS2_CANONICALIZATION_SETS] = { ucs2CharacterSet0, ucs2CharacterSet1, @@ -63,9 +71,17 @@ const UChar32* const ucs2CharacterSetInfo[UCS2_CANONICALIZATION_SETS] = { ucs2CharacterSet12, ucs2CharacterSet13, ucs2CharacterSet14, + ucs2CharacterSet15, + ucs2CharacterSet16, + ucs2CharacterSet17, + ucs2CharacterSet18, + ucs2CharacterSet19, + ucs2CharacterSet20, + ucs2CharacterSet21, + ucs2CharacterSet22, }; -const size_t UCS2_CANONICALIZATION_RANGES = 391; +const size_t UCS2_CANONICALIZATION_RANGES = 448; const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = { { 0x0000, 0x0040, 0x0000, CanonicalizeUnique }, { 0x0041, 0x005a, 0x0020, CanonicalizeRangeLo }, @@ -182,7 +198,7 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = { { 0x0267, 0x0267, 0x0000, CanonicalizeUnique }, { 0x0268, 0x0268, 0x00d1, CanonicalizeRangeHi }, { 0x0269, 0x0269, 0x00d3, CanonicalizeRangeHi }, - { 0x026a, 0x026a, 0x0000, CanonicalizeUnique }, + { 0x026a, 0x026a, 0xa544, CanonicalizeRangeLo }, { 0x026b, 0x026b, 0x29f7, CanonicalizeRangeLo }, { 0x026c, 0x026c, 0xa541, CanonicalizeRangeLo }, { 0x026d, 0x026e, 0x0000, CanonicalizeUnique }, @@ -206,7 +222,8 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = { { 0x028c, 0x028c, 0x0047, CanonicalizeRangeHi }, { 0x028d, 0x0291, 0x0000, CanonicalizeUnique }, { 0x0292, 0x0292, 0x00db, CanonicalizeRangeHi }, - { 0x0293, 0x029d, 0x0000, CanonicalizeUnique }, + { 0x0293, 0x029c, 0x0000, CanonicalizeUnique }, + { 0x029d, 0x029d, 0xa515, CanonicalizeRangeLo }, { 0x029e, 0x029e, 0xa512, CanonicalizeRangeLo }, { 0x029f, 0x0344, 0x0000, CanonicalizeUnique }, { 0x0345, 0x0345, 0x0007, CanonicalizeSet }, @@ -288,10 +305,34 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = { { 0x03fc, 0x03fc, 0x0000, CanonicalizeUnique }, { 0x03fd, 0x03ff, 0x0082, CanonicalizeRangeHi }, { 0x0400, 0x040f, 0x0050, CanonicalizeRangeLo }, - { 0x0410, 0x042f, 0x0020, CanonicalizeRangeLo }, - { 0x0430, 0x044f, 0x0020, CanonicalizeRangeHi }, + { 0x0410, 0x0411, 0x0020, CanonicalizeRangeLo }, + { 0x0412, 0x0412, 0x000e, CanonicalizeSet }, + { 0x0413, 0x0413, 0x0020, CanonicalizeRangeLo }, + { 0x0414, 0x0414, 0x000f, CanonicalizeSet }, + { 0x0415, 0x041d, 0x0020, CanonicalizeRangeLo }, + { 0x041e, 0x041e, 0x0010, CanonicalizeSet }, + { 0x041f, 0x0420, 0x0020, CanonicalizeRangeLo }, + { 0x0421, 0x0421, 0x0011, CanonicalizeSet }, + { 0x0422, 0x0422, 0x0012, CanonicalizeSet }, + { 0x0423, 0x0429, 0x0020, CanonicalizeRangeLo }, + { 0x042a, 0x042a, 0x0013, CanonicalizeSet }, + { 0x042b, 0x042f, 0x0020, CanonicalizeRangeLo }, + { 0x0430, 0x0431, 0x0020, CanonicalizeRangeHi }, + { 0x0432, 0x0432, 0x000e, CanonicalizeSet }, + { 0x0433, 0x0433, 0x0020, CanonicalizeRangeHi }, + { 0x0434, 0x0434, 0x000f, CanonicalizeSet }, + { 0x0435, 0x043d, 0x0020, CanonicalizeRangeHi }, + { 0x043e, 0x043e, 0x0010, CanonicalizeSet }, + { 0x043f, 0x0440, 0x0020, CanonicalizeRangeHi }, + { 0x0441, 0x0441, 0x0011, CanonicalizeSet }, + { 0x0442, 0x0442, 0x0012, CanonicalizeSet }, + { 0x0443, 0x0449, 0x0020, CanonicalizeRangeHi }, + { 0x044a, 0x044a, 0x0013, CanonicalizeSet }, + { 0x044b, 0x044f, 0x0020, CanonicalizeRangeHi }, { 0x0450, 0x045f, 0x0050, CanonicalizeRangeHi }, - { 0x0460, 0x0481, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0460, 0x0461, 0x0000, CanonicalizeAlternatingAligned }, + { 0x0462, 0x0463, 0x0014, CanonicalizeSet }, + { 0x0464, 0x0481, 0x0000, CanonicalizeAlternatingAligned }, { 0x0482, 0x0489, 0x0000, CanonicalizeUnique }, { 0x048a, 0x04bf, 0x0000, CanonicalizeAlternatingAligned }, { 0x04c0, 0x04c0, 0x000f, CanonicalizeRangeLo }, @@ -308,16 +349,38 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = { { 0x10c7, 0x10c7, 0x1c60, CanonicalizeRangeLo }, { 0x10c8, 0x10cc, 0x0000, CanonicalizeUnique }, { 0x10cd, 0x10cd, 0x1c60, CanonicalizeRangeLo }, - { 0x10ce, 0x1d78, 0x0000, CanonicalizeUnique }, + { 0x10ce, 0x10cf, 0x0000, CanonicalizeUnique }, + { 0x10d0, 0x10fa, 0x0bc0, CanonicalizeRangeLo }, + { 0x10fb, 0x10fc, 0x0000, CanonicalizeUnique }, + { 0x10fd, 0x10ff, 0x0bc0, CanonicalizeRangeLo }, + { 0x1100, 0x139f, 0x0000, CanonicalizeUnique }, + { 0x13a0, 0x13ef, 0x97d0, CanonicalizeRangeLo }, + { 0x13f0, 0x13f5, 0x0008, CanonicalizeRangeLo }, + { 0x13f6, 0x13f7, 0x0000, CanonicalizeUnique }, + { 0x13f8, 0x13fd, 0x0008, CanonicalizeRangeHi }, + { 0x13fe, 0x1c7f, 0x0000, CanonicalizeUnique }, + { 0x1c80, 0x1c80, 0x000e, CanonicalizeSet }, + { 0x1c81, 0x1c81, 0x000f, CanonicalizeSet }, + { 0x1c82, 0x1c82, 0x0010, CanonicalizeSet }, + { 0x1c83, 0x1c83, 0x0011, CanonicalizeSet }, + { 0x1c84, 0x1c85, 0x0012, CanonicalizeSet }, + { 0x1c86, 0x1c86, 0x0013, CanonicalizeSet }, + { 0x1c87, 0x1c87, 0x0014, CanonicalizeSet }, + { 0x1c88, 0x1c88, 0x0016, CanonicalizeSet }, + { 0x1c89, 0x1c8f, 0x0000, CanonicalizeUnique }, + { 0x1c90, 0x1cba, 0x0bc0, CanonicalizeRangeHi }, + { 0x1cbb, 0x1cbc, 0x0000, CanonicalizeUnique }, + { 0x1cbd, 0x1cbf, 0x0bc0, CanonicalizeRangeHi }, + { 0x1cc0, 0x1d78, 0x0000, CanonicalizeUnique }, { 0x1d79, 0x1d79, 0x8a04, CanonicalizeRangeLo }, { 0x1d7a, 0x1d7c, 0x0000, CanonicalizeUnique }, { 0x1d7d, 0x1d7d, 0x0ee6, CanonicalizeRangeLo }, { 0x1d7e, 0x1dff, 0x0000, CanonicalizeUnique }, { 0x1e00, 0x1e5f, 0x0000, CanonicalizeAlternatingAligned }, - { 0x1e60, 0x1e61, 0x000e, CanonicalizeSet }, + { 0x1e60, 0x1e61, 0x0015, CanonicalizeSet }, { 0x1e62, 0x1e95, 0x0000, CanonicalizeAlternatingAligned }, { 0x1e96, 0x1e9a, 0x0000, CanonicalizeUnique }, - { 0x1e9b, 0x1e9b, 0x000e, CanonicalizeSet }, + { 0x1e9b, 0x1e9b, 0x0015, CanonicalizeSet }, { 0x1e9c, 0x1e9f, 0x0000, CanonicalizeUnique }, { 0x1ea0, 0x1eff, 0x0000, CanonicalizeAlternatingAligned }, { 0x1f00, 0x1f07, 0x0008, CanonicalizeRangeLo }, @@ -428,7 +491,9 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = { { 0x2d28, 0x2d2c, 0x0000, CanonicalizeUnique }, { 0x2d2d, 0x2d2d, 0x1c60, CanonicalizeRangeHi }, { 0x2d2e, 0xa63f, 0x0000, CanonicalizeUnique }, - { 0xa640, 0xa66d, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa640, 0xa649, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa64a, 0xa64b, 0x0016, CanonicalizeSet }, + { 0xa64c, 0xa66d, 0x0000, CanonicalizeAlternatingAligned }, { 0xa66e, 0xa67f, 0x0000, CanonicalizeUnique }, { 0xa680, 0xa69b, 0x0000, CanonicalizeAlternatingAligned }, { 0xa69c, 0xa721, 0x0000, CanonicalizeUnique }, @@ -450,15 +515,42 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = { { 0xa7ab, 0xa7ab, 0xa54f, CanonicalizeRangeHi }, { 0xa7ac, 0xa7ac, 0xa54b, CanonicalizeRangeHi }, { 0xa7ad, 0xa7ad, 0xa541, CanonicalizeRangeHi }, - { 0xa7ae, 0xa7af, 0x0000, CanonicalizeUnique }, + { 0xa7ae, 0xa7ae, 0xa544, CanonicalizeRangeHi }, + { 0xa7af, 0xa7af, 0x0000, CanonicalizeUnique }, { 0xa7b0, 0xa7b0, 0xa512, CanonicalizeRangeHi }, { 0xa7b1, 0xa7b1, 0xa52a, CanonicalizeRangeHi }, - { 0xa7b2, 0xff20, 0x0000, CanonicalizeUnique }, + { 0xa7b2, 0xa7b2, 0xa515, CanonicalizeRangeHi }, + { 0xa7b3, 0xa7b3, 0x03a0, CanonicalizeRangeLo }, + { 0xa7b4, 0xa7b9, 0x0000, CanonicalizeAlternatingAligned }, + { 0xa7ba, 0xab52, 0x0000, CanonicalizeUnique }, + { 0xab53, 0xab53, 0x03a0, CanonicalizeRangeHi }, + { 0xab54, 0xab6f, 0x0000, CanonicalizeUnique }, + { 0xab70, 0xabbf, 0x97d0, CanonicalizeRangeHi }, + { 0xabc0, 0xff20, 0x0000, CanonicalizeUnique }, { 0xff21, 0xff3a, 0x0020, CanonicalizeRangeLo }, { 0xff3b, 0xff40, 0x0000, CanonicalizeUnique }, { 0xff41, 0xff5a, 0x0020, CanonicalizeRangeHi }, { 0xff5b, 0xffff, 0x0000, CanonicalizeUnique }, }; +const uint16_t canonicalTableLChar[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0x39c, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xf7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0x178 +}; + } } // JSC::Yarr diff --git a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js index dc578cfece..b92d8bdd4f 100644 --- a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js +++ b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012, 2016 Apple Inc. All rights reserved. + * Copyright (C) 2012-2018 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -27,7 +27,7 @@ function printHeader() { var copyright = ( "/*" + "\n" + - " * Copyright (C) 2012-2013, 2015-2016 Apple Inc. All rights reserved." + "\n" + + " * Copyright (C) 2012-2018 Apple Inc. All rights reserved." + "\n" + " *" + "\n" + " * Redistribution and use in source and binary forms, with or without" + "\n" + " * modification, are permitted provided that the following conditions" + "\n" + @@ -183,6 +183,23 @@ function createTables(prefix, maxValue, canonicalGroups) } print("};"); print(); + // Create canonical table for LChar domain + let line = "const uint16_t canonicalTableLChar[256] = {"; + for (let i = 0; i < 256; i++) { + if (!(i % 16)) { + print(line); + line = " "; + } + let canonicalChar = canonicalize(i); + line = line + (canonicalChar < 16 ? "0x0" : "0x") + canonicalChar.toString(16); + if ((i % 16) != 15) + line += ", "; + else if (i != 255) + line += ","; + } + print(line); + print("};"); + print(); } printHeader(); diff --git a/src/3rdparty/masm/yarr/YarrErrorCode.h b/src/3rdparty/masm/yarr/YarrErrorCode.h index 48f2bb7900..3f06a6bff1 100644 --- a/src/3rdparty/masm/yarr/YarrErrorCode.h +++ b/src/3rdparty/masm/yarr/YarrErrorCode.h @@ -60,6 +60,13 @@ inline bool hasError(ErrorCode errorCode) { return errorCode != ErrorCode::NoError; } + +inline bool hasHardError(ErrorCode errorCode) +{ + // TooManyDisjunctions means that we ran out stack compiling. + // All other errors are due to problems in the expression. + return hasError(errorCode) && errorCode != ErrorCode::TooManyDisjunctions; +} JS_EXPORT_PRIVATE JSObject* errorToThrow(ExecState*, ErrorCode); } } // namespace JSC::Yarr diff --git a/src/3rdparty/masm/yarr/YarrInterpreter.cpp b/src/3rdparty/masm/yarr/YarrInterpreter.cpp index 4d3652fcbc..cdcd16af64 100644 --- a/src/3rdparty/masm/yarr/YarrInterpreter.cpp +++ b/src/3rdparty/masm/yarr/YarrInterpreter.cpp @@ -32,12 +32,12 @@ #include "Yarr.h" #include "YarrCanonicalize.h" #include <wtf/BumpPointerAllocator.h> +#include <wtf/CheckedArithmetic.h> #include <wtf/DataLog.h> +#include <wtf/StdLibExtras.h> #include <wtf/text/CString.h> #include <wtf/text/WTFString.h> -using namespace WTF; - namespace JSC { namespace Yarr { template<typename CharType> @@ -67,17 +67,23 @@ public: struct DisjunctionContext { - DisjunctionContext() - : term(0) - { - } + DisjunctionContext() = default; void* operator new(size_t, void* where) { return where; } - int term; + static size_t allocationSize(unsigned numberOfFrames) + { + static_assert(alignof(DisjunctionContext) <= sizeof(void*), ""); + size_t rawSize = (sizeof(DisjunctionContext) - sizeof(uintptr_t) + Checked<size_t>(numberOfFrames) * sizeof(uintptr_t)).unsafeGet(); + size_t roundedSize = WTF::roundUpToMultipleOf<sizeof(void*)>(rawSize); + RELEASE_ASSERT(roundedSize >= rawSize); + return roundedSize; + } + + int term { 0 }; unsigned matchBegin; unsigned matchEnd; uintptr_t frame[1]; @@ -85,7 +91,7 @@ public: DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction) { - size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); + size_t size = DisjunctionContext::allocationSize(disjunction->m_frameSize); allocatorPool = allocatorPool->ensureCapacity(size); RELEASE_ASSERT(allocatorPool); return new (allocatorPool->alloc(size)) DisjunctionContext(); @@ -99,7 +105,6 @@ public: struct ParenthesesDisjunctionContext { ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term) - : next(0) { unsigned firstSubpatternId = term.atom.subpatternId; unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns; @@ -125,16 +130,25 @@ public: DisjunctionContext* getDisjunctionContext(ByteTerm& term) { - return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1])); + return bitwise_cast<DisjunctionContext*>(bitwise_cast<uintptr_t>(this) + allocationSize(term.atom.parenthesesDisjunction->m_numSubpatterns)); } - ParenthesesDisjunctionContext* next; + static size_t allocationSize(unsigned numberOfSubpatterns) + { + static_assert(alignof(ParenthesesDisjunctionContext) <= sizeof(void*), ""); + size_t rawSize = (sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (Checked<size_t>(numberOfSubpatterns) * 2U) * sizeof(unsigned)).unsafeGet(); + size_t roundedSize = WTF::roundUpToMultipleOf<sizeof(void*)>(rawSize); + RELEASE_ASSERT(roundedSize >= rawSize); + return roundedSize; + } + + ParenthesesDisjunctionContext* next { nullptr }; unsigned subpatternBackup[1]; }; ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term) { - size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + static_cast<size_t>(disjunction->m_frameSize) * sizeof(uintptr_t); + size_t size = (Checked<size_t>(ParenthesesDisjunctionContext::allocationSize(term.atom.parenthesesDisjunction->m_numSubpatterns)) + DisjunctionContext::allocationSize(disjunction->m_frameSize)).unsafeGet(); allocatorPool = allocatorPool->ensureCapacity(size); RELEASE_ASSERT(allocatorPool); return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term); @@ -1630,7 +1644,6 @@ public: , unicode(pattern->unicode()) , output(output) , input(input, start, length, pattern->unicode()) - , allocatorPool(0) , startOffset(start) , remainingMatchCount(matchLimit) { @@ -1641,7 +1654,7 @@ private: bool unicode; unsigned* output; InputStream input; - BumpPointerPool* allocatorPool; + WTF::BumpPointerPool* allocatorPool { nullptr }; unsigned startOffset; unsigned remainingMatchCount; }; @@ -1740,7 +1753,7 @@ public: void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) { - unsigned beginTerm = m_bodyDisjunction->terms.size(); + int beginTerm = m_bodyDisjunction->terms.size(); m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; diff --git a/src/3rdparty/masm/yarr/YarrJIT.cpp b/src/3rdparty/masm/yarr/YarrJIT.cpp index da65b772f7..1c8138c66e 100644 --- a/src/3rdparty/masm/yarr/YarrJIT.cpp +++ b/src/3rdparty/masm/yarr/YarrJIT.cpp @@ -37,15 +37,12 @@ #if ENABLE(YARR_JIT) -using namespace WTF; - namespace JSC { namespace Yarr { template<YarrJITCompileMode compileMode> class YarrGenerator : private DefaultMacroAssembler { - friend void jitCompile(VM*, YarrCodeBlock&, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); -#if CPU(ARM) +#if CPU(ARM_THUMB2) static const RegisterID input = ARMRegisters::r0; static const RegisterID index = ARMRegisters::r1; static const RegisterID length = ARMRegisters::r2; @@ -477,6 +474,12 @@ class YarrGenerator : private DefaultMacroAssembler { return branch32(BelowOrEqual, index, length); } + Jump checkNotEnoughInput(RegisterID additionalAmount) + { + add32(index, additionalAmount); + return branch32(Above, additionalAmount, length); + } + Jump checkInput() { return branch32(BelowOrEqual, index, length); @@ -559,6 +562,16 @@ class YarrGenerator : private DefaultMacroAssembler { } #endif + void readCharacterDontDecodeSurrogates(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index) + { + BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg); + + if (m_charSize == Char8) + load8(address, resultReg); + else + load16Unaligned(address, resultReg); + } + void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index) { BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg); @@ -809,16 +822,16 @@ class YarrGenerator : private DefaultMacroAssembler { // The operation, as a YarrOpCode, and also a reference to the PatternTerm. YarrOpCode m_op; - PatternTerm* m_term; + PatternTerm* m_term = nullptr; // For alternatives, this holds the PatternAlternative and doubly linked // references to this alternative's siblings. In the case of the // OpBodyAlternativeEnd node at the end of a section of repeating nodes, // m_nextOp will reference the OpBodyAlternativeBegin node of the first // repeating alternative. - PatternAlternative* m_alternative; - size_t m_previousOp; - size_t m_nextOp; + PatternAlternative* m_alternative = nullptr; + size_t m_previousOp = 0; + size_t m_nextOp = 0; // Used to record a set of Jumps out of the generated code, typically // used for jumps out to backtracking code, and a single reentry back @@ -1119,6 +1132,228 @@ class YarrGenerator : private DefaultMacroAssembler { backtrackTermDefault(opIndex); } +#if ENABLE(YARR_JIT_BACKREFERENCES) + void matchBackreference(size_t opIndex, JumpList& characterMatchFails, RegisterID character, RegisterID patternIndex, RegisterID patternCharacter) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + unsigned subpatternId = term->backReferenceSubpatternId; + + Label loop(this); + + readCharacterDontDecodeSurrogates(0, patternCharacter, patternIndex); + readCharacterDontDecodeSurrogates(m_checkedOffset - term->inputPosition, character); + + if (!m_pattern.ignoreCase()) + characterMatchFails.append(branch32(NotEqual, character, patternCharacter)); + else { + Jump charactersMatch = branch32(Equal, character, patternCharacter); + ExtendedAddress characterTableEntry(character, reinterpret_cast<intptr_t>(&canonicalTableLChar)); + load16(characterTableEntry, character); + ExtendedAddress patternTableEntry(patternCharacter, reinterpret_cast<intptr_t>(&canonicalTableLChar)); + load16(patternTableEntry, patternCharacter); + characterMatchFails.append(branch32(NotEqual, character, patternCharacter)); + charactersMatch.link(this); + } + + + add32(TrustedImm32(1), index); + add32(TrustedImm32(1), patternIndex); + + branch32(NotEqual, patternIndex, Address(output, ((subpatternId << 1) + 1) * sizeof(int))).linkTo(loop, this); + } + + void generateBackReference(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + if (m_pattern.ignoreCase() && m_charSize != Char8) { + m_failureReason = JITFailureReason::BackReference; + return; + } + + unsigned subpatternId = term->backReferenceSubpatternId; + unsigned parenthesesFrameLocation = term->frameLocation; + + const RegisterID characterOrTemp = regT0; + const RegisterID patternIndex = regT1; + const RegisterID patternTemp = regT2; + + storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex()); + if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1) + storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex()); + + JumpList matches; + + if (term->quantityType != QuantifierNonGreedy) { + load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex); + load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp); + + // An empty match is successful without consuming characters + if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1) { + matches.append(branch32(Equal, TrustedImm32(-1), patternIndex)); + matches.append(branch32(Equal, patternIndex, patternTemp)); + } else { + Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex); + Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp); + zeroLengthMatch.link(this); + storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex()); + matches.append(jump()); + tryNonZeroMatch.link(this); + } + } + + switch (term->quantityType) { + case QuantifierFixedCount: { + Label outerLoop(this); + + // PatternTemp should contain pattern end index at this point + sub32(patternIndex, patternTemp); + if (m_checkedOffset - term->inputPosition) + sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp); + op.m_jumps.append(checkNotEnoughInput(patternTemp)); + + matchBackreference(opIndex, op.m_jumps, characterOrTemp, patternIndex, patternTemp); + + if (term->quantityMaxCount != 1) { + loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp); + add32(TrustedImm32(1), characterOrTemp); + storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex()); + matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp)); + load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex); + load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp); + jump(outerLoop); + } + matches.link(this); + break; + } + + case QuantifierGreedy: { + JumpList incompleteMatches; + + Label outerLoop(this); + + // PatternTemp should contain pattern end index at this point + sub32(patternIndex, patternTemp); + if (m_checkedOffset - term->inputPosition) + sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp); + matches.append(checkNotEnoughInput(patternTemp)); + + matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp); + + loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp); + add32(TrustedImm32(1), characterOrTemp); + storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex()); + if (term->quantityMaxCount != quantifyInfinite) + matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp)); + load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex); + load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp); + + // Store current index in frame for restoring after a partial match + storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex()); + jump(outerLoop); + + incompleteMatches.link(this); + loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index); + + matches.link(this); + op.m_reentry = label(); + break; + } + + case QuantifierNonGreedy: { + JumpList incompleteMatches; + + matches.append(jump()); + + op.m_reentry = label(); + + load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex); + load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp); + + // An empty match is successful without consuming characters + Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex); + Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp); + zeroLengthMatch.link(this); + storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex()); + matches.append(jump()); + tryNonZeroMatch.link(this); + + // Check if we have input remaining to match + sub32(patternIndex, patternTemp); + if (m_checkedOffset - term->inputPosition) + sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp); + matches.append(checkNotEnoughInput(patternTemp)); + + storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex()); + + matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp); + + matches.append(jump()); + + incompleteMatches.link(this); + loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index); + + matches.link(this); + break; + } + } + } + void backtrackBackReference(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + unsigned subpatternId = term->backReferenceSubpatternId; + + m_backtrackingState.link(this); + op.m_jumps.link(this); + + JumpList failures; + + unsigned parenthesesFrameLocation = term->frameLocation; + switch (term->quantityType) { + case QuantifierFixedCount: + loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index); + break; + + case QuantifierGreedy: { + const RegisterID matchAmount = regT0; + const RegisterID patternStartIndex = regT1; + const RegisterID patternEndIndexOrLen = regT2; + + loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount); + failures.append(branchTest32(Zero, matchAmount)); + + load32(Address(output, (subpatternId << 1) * sizeof(int)), patternStartIndex); + load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternEndIndexOrLen); + sub32(patternStartIndex, patternEndIndexOrLen); + sub32(patternEndIndexOrLen, index); + + sub32(TrustedImm32(1), matchAmount); + storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex()); + jump(op.m_reentry); + break; + } + + case QuantifierNonGreedy: { + const RegisterID matchAmount = regT0; + + loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount); + if (term->quantityMaxCount != quantifyInfinite) + failures.append(branch32(AboveOrEqual, Imm32(term->quantityMaxCount.unsafeGet()), matchAmount)); + add32(TrustedImm32(1), matchAmount); + storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex()); + jump(op.m_reentry); + break; + } + } + failures.link(this); + m_backtrackingState.fallthrough(); + } +#endif + void generatePatternCharacterOnce(size_t opIndex) { YarrOp& op = m_ops[opIndex]; @@ -1141,12 +1376,16 @@ class YarrGenerator : private DefaultMacroAssembler { } const RegisterID character = regT0; +#if CPU(X86_64) || CPU(ARM64) + unsigned maxCharactersAtOnce = m_charSize == Char8 ? 8 : 4; +#else unsigned maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2; - unsigned ignoreCaseMask = 0; +#endif + uint64_t ignoreCaseMask = 0; #if CPU(BIG_ENDIAN) - int allCharacters = ch << (m_charSize == Char8 ? 24 : 16); + uint64_t allCharacters = ch << (m_charSize == Char8 ? 24 : 16); #else - int allCharacters = ch; + uint64_t allCharacters = ch; #endif unsigned numberCharacters; unsigned startTermPosition = term->inputPosition; @@ -1155,16 +1394,19 @@ class YarrGenerator : private DefaultMacroAssembler { // upper & lower case representations are converted to a character class. ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode)); - if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) + if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) { #if CPU(BIG_ENDIAN) ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16); #else ignoreCaseMask |= 32; #endif + } for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) { PatternTerm* nextTerm = nextOp->m_term; - + + // YarrJIT handles decoded surrogate pair as one character if unicode flag is enabled. + // Note that the numberCharacters become 1 while the width of the pattern character becomes 32bit in this case. if (nextTerm->type != PatternTerm::TypePatternCharacter || nextTerm->quantityType != QuantifierFixedCount || nextTerm->quantityMaxCount != 1 @@ -1192,49 +1434,132 @@ class YarrGenerator : private DefaultMacroAssembler { // upper & lower case representations are converted to a character class. ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter, m_canonicalMode)); - allCharacters |= (currentCharacter << shiftAmount); + allCharacters |= (static_cast<uint64_t>(currentCharacter) << shiftAmount); if ((m_pattern.ignoreCase()) && (isASCIIAlpha(currentCharacter))) - ignoreCaseMask |= 32 << shiftAmount; + ignoreCaseMask |= 32ULL << shiftAmount; } + if (m_decodeSurrogatePairs) + op.m_jumps.append(jumpIfNoAvailableInput()); + if (m_charSize == Char8) { + auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) { + op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character)); + }; + + auto check2 = [&] (Checked<unsigned> offset, uint16_t characters, uint16_t mask) { + load16Unaligned(negativeOffsetIndexedAddress(offset, character), character); + if (mask) + or32(Imm32(mask), character); + op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask))); + }; + + auto check4 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) { + if (mask) { + load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character); + if (mask) + or32(Imm32(mask), character); + op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask))); + return; + } + op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters))); + }; + +#if CPU(X86_64) || CPU(ARM64) + auto check8 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) { + load64(negativeOffsetIndexedAddress(offset, character), character); + if (mask) + or64(TrustedImm64(mask), character); + op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask))); + }; +#endif + switch (numberCharacters) { case 1: - op.m_jumps.append(jumpIfCharNotEquals(ch, m_checkedOffset - startTermPosition, character)); + // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag. + check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff); return; case 2: { - load16Unaligned(negativeOffsetIndexedAddress(m_checkedOffset - startTermPosition, character), character); - break; + check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff); + return; } case 3: { - load16Unaligned(negativeOffsetIndexedAddress(m_checkedOffset - startTermPosition, character), character); - if (ignoreCaseMask) - or32(Imm32(ignoreCaseMask), character); - op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask))); - op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, m_checkedOffset - startTermPosition - 2, character)); + check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff); + check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 16) & 0xff); return; } case 4: { - load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(m_checkedOffset- startTermPosition, character), character); - break; + check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff); + return; + } +#if CPU(X86_64) || CPU(ARM64) + case 5: { + check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff); + check1(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xff); + return; + } + case 6: { + check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff); + check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff); + return; + } + case 7: { + check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff); + check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff); + check1(m_checkedOffset - startTermPosition - 6, (allCharacters >> 48) & 0xff); + return; + } + case 8: { + check8(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask); + return; } +#endif } } else { + auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) { + op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character)); + }; + + auto check2 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) { + if (mask) { + load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character); + if (mask) + or32(Imm32(mask), character); + op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask))); + return; + } + op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters))); + }; + +#if CPU(X86_64) || CPU(ARM64) + auto check4 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) { + load64(negativeOffsetIndexedAddress(offset, character), character); + if (mask) + or64(TrustedImm64(mask), character); + op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask))); + }; +#endif + switch (numberCharacters) { case 1: - op.m_jumps.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character)); + // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag. + check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff); return; case 2: - load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(m_checkedOffset- term->inputPosition, character), character); - break; + check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff); + return; +#if CPU(X86_64) || CPU(ARM64) + case 3: + check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff); + check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 32) & 0xffff); + return; + case 4: + check4(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask); + return; +#endif } } - - if (ignoreCaseMask) - or32(Imm32(ignoreCaseMask), character); - op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask))); - return; } void backtrackPatternCharacterOnce(size_t opIndex) { @@ -1250,6 +1575,9 @@ class YarrGenerator : private DefaultMacroAssembler { const RegisterID character = regT0; const RegisterID countRegister = regT1; + if (m_decodeSurrogatePairs) + op.m_jumps.append(jumpIfNoAvailableInput()); + move(index, countRegister); Checked<unsigned> scaledMaxCount = term->quantityMaxCount; scaledMaxCount *= U_IS_BMP(ch) ? 1 : 2; @@ -1403,8 +1731,10 @@ class YarrGenerator : private DefaultMacroAssembler { const RegisterID character = regT0; - if (m_decodeSurrogatePairs) + if (m_decodeSurrogatePairs) { + op.m_jumps.append(jumpIfNoAvailableInput()); storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex()); + } JumpList matchDest; readCharacter(m_checkedOffset - term->inputPosition, character); @@ -1451,6 +1781,9 @@ class YarrGenerator : private DefaultMacroAssembler { const RegisterID character = regT0; const RegisterID countRegister = regT1; + if (m_decodeSurrogatePairs) + op.m_jumps.append(jumpIfNoAvailableInput()); + move(index, countRegister); sub32(Imm32(term->quantityMaxCount.unsafeGet()), countRegister); @@ -1780,13 +2113,19 @@ class YarrGenerator : private DefaultMacroAssembler { break; case PatternTerm::TypeForwardReference: + m_failureReason = JITFailureReason::ForwardReference; break; case PatternTerm::TypeParenthesesSubpattern: case PatternTerm::TypeParentheticalAssertion: RELEASE_ASSERT_NOT_REACHED(); + case PatternTerm::TypeBackReference: +#if ENABLE(YARR_JIT_BACKREFERENCES) + generateBackReference(opIndex); +#else m_failureReason = JITFailureReason::BackReference; +#endif break; case PatternTerm::TypeDotStarEnclosure: generateDotStarEnclosure(opIndex); @@ -1846,18 +2185,23 @@ class YarrGenerator : private DefaultMacroAssembler { break; case PatternTerm::TypeForwardReference: + m_failureReason = JITFailureReason::ForwardReference; break; case PatternTerm::TypeParenthesesSubpattern: case PatternTerm::TypeParentheticalAssertion: RELEASE_ASSERT_NOT_REACHED(); - case PatternTerm::TypeDotStarEnclosure: - backtrackDotStarEnclosure(opIndex); - break; - case PatternTerm::TypeBackReference: +#if ENABLE(YARR_JIT_BACKREFERENCES) + backtrackBackReference(opIndex); +#else m_failureReason = JITFailureReason::BackReference; +#endif + break; + + case PatternTerm::TypeDotStarEnclosure: + backtrackDotStarEnclosure(opIndex); break; } } @@ -2157,7 +2501,7 @@ class YarrGenerator : private DefaultMacroAssembler { } // If the parentheses are quantified Greedy then add a label to jump back - // to if get a failed match from after the parentheses. For NonGreedy + // to if we get a failed match from after the parentheses. For NonGreedy // parentheses, link the jump from before the subpattern to here. if (term->quantityType == QuantifierGreedy) op.m_reentry = label(); @@ -2221,11 +2565,11 @@ class YarrGenerator : private DefaultMacroAssembler { // match within the parentheses, or the second having skipped over them. // - To check for empty matches, which must be rejected. // - // At the head of a NonGreedy set of parentheses we'll immediately set the - // value on the stack to -1 (indicating a match skipping the subpattern), + // At the head of a NonGreedy set of parentheses we'll immediately set 'begin' + // in the backtrack info to -1 (indicating a match skipping the subpattern), // and plant a jump to the end. We'll also plant a label to backtrack to - // to reenter the subpattern later, with a store to set up index on the - // second iteration. + // to reenter the subpattern later, with a store to set 'begin' to current index + // on the second iteration. // // FIXME: for capturing parens, could use the index in the capture array? if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) { @@ -2312,7 +2656,7 @@ class YarrGenerator : private DefaultMacroAssembler { } // If the parentheses are quantified Greedy then add a label to jump back - // to if get a failed match from after the parentheses. For NonGreedy + // to if we get a failed match from after the parentheses. For NonGreedy // parentheses, link the jump from before the subpattern to here. if (term->quantityType == QuantifierGreedy) { if (term->quantityMaxCount != quantifyInfinite) @@ -2324,6 +2668,7 @@ class YarrGenerator : private DefaultMacroAssembler { } else if (term->quantityType == QuantifierNonGreedy) { YarrOp& beginOp = m_ops[op.m_previousOp]; beginOp.m_jumps.link(this); + op.m_reentry = label(); } #else // !YARR_JIT_ALL_PARENS_EXPRESSIONS RELEASE_ASSERT_NOT_REACHED(); @@ -2385,6 +2730,7 @@ class YarrGenerator : private DefaultMacroAssembler { do { --opIndex; + YarrOp& op = m_ops[opIndex]; switch (op.m_op) { @@ -2881,32 +3227,32 @@ class YarrGenerator : private DefaultMacroAssembler { if (term->quantityType != QuantifierFixedCount) { m_backtrackingState.link(this); - if (term->quantityType == QuantifierGreedy) { - RegisterID currParenContextReg = regT0; - RegisterID newParenContextReg = regT1; + RegisterID currParenContextReg = regT0; + RegisterID newParenContextReg = regT1; - loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg); + loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg); - restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation); + restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation); - freeParenContext(currParenContextReg, newParenContextReg); - storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex()); - const RegisterID countTemporary = regT0; - loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary); - Jump zeroLengthMatch = branchTest32(Zero, countTemporary); + freeParenContext(currParenContextReg, newParenContextReg); + storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex()); - sub32(TrustedImm32(1), countTemporary); - storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex()); + const RegisterID countTemporary = regT0; + loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary); + Jump zeroLengthMatch = branchTest32(Zero, countTemporary); - jump(m_ops[op.m_nextOp].m_reentry); + sub32(TrustedImm32(1), countTemporary); + storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex()); - zeroLengthMatch.link(this); + jump(m_ops[op.m_nextOp].m_reentry); - // Clear the flag in the stackframe indicating we didn't run through the subpattern. - storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()); + zeroLengthMatch.link(this); + // Clear the flag in the stackframe indicating we didn't run through the subpattern. + storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()); + + if (term->quantityType == QuantifierGreedy) jump(m_ops[op.m_nextOp].m_reentry); - } // If Greedy, jump to the end. if (term->quantityType == QuantifierGreedy) { @@ -2929,13 +3275,14 @@ class YarrGenerator : private DefaultMacroAssembler { if (term->quantityType != QuantifierFixedCount) { m_backtrackingState.link(this); - // Check whether we should backtrack back into the parentheses, or if we - // are currently in a state where we had skipped over the subpattern - // (in which case the flag value on the stack will be -1). unsigned parenthesesFrameLocation = term->frameLocation; - Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1)); if (term->quantityType == QuantifierGreedy) { + // Check whether we should backtrack back into the parentheses, or if we + // are currently in a state where we had skipped over the subpattern + // (in which case the flag value on the stack will be -1). + Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1)); + // For Greedy parentheses, we skip after having already tried going // through the subpattern, so if we get here we're done. YarrOp& beginOp = m_ops[op.m_previousOp]; @@ -2946,8 +3293,25 @@ class YarrGenerator : private DefaultMacroAssembler { // next. Jump back to the start of the parentheses in the forwards // matching path. ASSERT(term->quantityType == QuantifierNonGreedy); + + const RegisterID beginTemporary = regT0; + const RegisterID countTemporary = regT1; + YarrOp& beginOp = m_ops[op.m_previousOp]; - hadSkipped.linkTo(beginOp.m_reentry, this); + + loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex(), beginTemporary); + branch32(Equal, beginTemporary, TrustedImm32(-1)).linkTo(beginOp.m_reentry, this); + + JumpList exceededMatchLimit; + + if (term->quantityMaxCount != quantifyInfinite) { + loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary); + exceededMatchLimit.append(branch32(AboveOrEqual, countTemporary, Imm32(term->quantityMaxCount.unsafeGet()))); + } + + branch32(Above, index, beginTemporary).linkTo(beginOp.m_reentry, this); + + exceededMatchLimit.link(this); } m_backtrackingState.fallthrough(); @@ -3021,7 +3385,7 @@ class YarrGenerator : private DefaultMacroAssembler { // the parentheses. // Supported types of parentheses are 'Once' (quantityMaxCount == 1), // 'Terminal' (non-capturing parentheses quantified as greedy - // and infinite), and 0 based greedy quantified parentheses. + // and infinite), and 0 based greedy / non-greedy quantified parentheses. // Alternatives will use the 'Simple' set of ops if either the // subpattern is terminal (in which case we will never need to // backtrack), or if the subpattern only contains one alternative. @@ -3043,7 +3407,9 @@ class YarrGenerator : private DefaultMacroAssembler { if (term->quantityMinCount && term->quantityMinCount != term->quantityMaxCount) { m_failureReason = JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum; return; - } if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) { + } + + if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) { // Select the 'Once' nodes. parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin; parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd; @@ -3060,10 +3426,10 @@ class YarrGenerator : private DefaultMacroAssembler { parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd; } else { #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) - // We only handle generic parenthesis with greedy counts. - if (term->quantityType != QuantifierGreedy) { + // We only handle generic parenthesis with non-fixed counts. + if (term->quantityType == QuantifierFixedCount) { // This subpattern is not supported by the JIT. - m_failureReason = JITFailureReason::NonGreedyParenthesizedSubpattern; + m_failureReason = JITFailureReason::FixedCountParenthesizedSubpattern; return; } @@ -3369,7 +3735,7 @@ class YarrGenerator : private DefaultMacroAssembler { // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves. zeroExtend32ToPtr(index, index); zeroExtend32ToPtr(length, length); -#elif CPU(ARM) +#elif CPU(ARM_THUMB2) push(ARMRegisters::r4); push(ARMRegisters::r5); push(ARMRegisters::r6); @@ -3422,7 +3788,7 @@ class YarrGenerator : private DefaultMacroAssembler { #elif CPU(ARM64) if (m_decodeSurrogatePairs) popPair(framePointerRegister, linkRegister); -#elif CPU(ARM) +#elif CPU(ARM_THUMB2) pop(ARMRegisters::r8); pop(ARMRegisters::r6); pop(ARMRegisters::r5); @@ -3460,10 +3826,14 @@ public: } #endif -#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) - if (m_containsNestedSubpatterns) - codeBlock.setUsesPaternContextBuffer(); + if (m_pattern.m_containsBackreferences +#if ENABLE(YARR_JIT_BACKREFERENCES) + && (compileMode == MatchOnly || (m_pattern.ignoreCase() && m_charSize != Char8)) #endif + ) { + codeBlock.setFallBackWithFailureReason(JITFailureReason::BackReference); + return; + } // We need to compile before generating code since we set flags based on compilation that // are used during generation. @@ -3473,7 +3843,12 @@ public: codeBlock.setFallBackWithFailureReason(*m_failureReason); return; } - + +#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) + if (m_containsNestedSubpatterns) + codeBlock.setUsesPatternContextBuffer(); +#endif + generateEnter(); Jump hasInput = checkInput(); @@ -3618,7 +3993,10 @@ static void dumpCompileFailure(JITFailureReason failure) dataLog("Can't JIT a pattern decoding surrogate pairs\n"); break; case JITFailureReason::BackReference: - dataLog("Can't JIT a pattern containing back references\n"); + dataLog("Can't JIT some patterns containing back references\n"); + break; + case JITFailureReason::ForwardReference: + dataLog("Can't JIT a pattern containing forward references\n"); break; case JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum: dataLog("Can't JIT a pattern containing a variable counted parenthesis with a non-zero minimum\n"); @@ -3626,8 +4004,8 @@ static void dumpCompileFailure(JITFailureReason failure) case JITFailureReason::ParenthesizedSubpattern: dataLog("Can't JIT a pattern containing parenthesized subpatterns\n"); break; - case JITFailureReason::NonGreedyParenthesizedSubpattern: - dataLog("Can't JIT a pattern containing non-greedy parenthesized subpatterns\n"); + case JITFailureReason::FixedCountParenthesizedSubpattern: + dataLog("Can't JIT a pattern containing fixed count parenthesized subpatterns\n"); break; case JITFailureReason::ExecutableMemoryAllocationFailure: dataLog("Can't JIT because of failure of allocation of executable memory\n"); diff --git a/src/3rdparty/masm/yarr/YarrJIT.h b/src/3rdparty/masm/yarr/YarrJIT.h index 35a0690f6e..c6410d3c44 100644 --- a/src/3rdparty/masm/yarr/YarrJIT.h +++ b/src/3rdparty/masm/yarr/YarrJIT.h @@ -54,9 +54,10 @@ namespace Yarr { enum class JITFailureReason : uint8_t { DecodeSurrogatePair, BackReference, + ForwardReference, VariableCountedParenthesisWithNonZeroMinimum, ParenthesizedSubpattern, - NonGreedyParenthesizedSubpattern, + FixedCountParenthesizedSubpattern, ExecutableMemoryAllocationFailure, }; @@ -107,7 +108,7 @@ public: #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) bool usesPatternContextBuffer() { return m_usesPatternContextBuffer; } - void setUsesPaternContextBuffer() { m_usesPatternContextBuffer = true; } + void setUsesPatternContextBuffer() { m_usesPatternContextBuffer = true; } MatchResult execute(const LChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize) { diff --git a/src/3rdparty/masm/yarr/YarrParser.h b/src/3rdparty/masm/yarr/YarrParser.h index edc6beb1f0..a18b553ef0 100644 --- a/src/3rdparty/masm/yarr/YarrParser.h +++ b/src/3rdparty/masm/yarr/YarrParser.h @@ -194,7 +194,9 @@ private: // invoked with inCharacterClass set. NO_RETURN_DUE_TO_ASSERT void assertionWordBoundary(bool) { RELEASE_ASSERT_NOT_REACHED(); } NO_RETURN_DUE_TO_ASSERT void atomBackReference(unsigned) { RELEASE_ASSERT_NOT_REACHED(); } - NO_RETURN_DUE_TO_ASSERT void atomNamedBackReference(String) { RELEASE_ASSERT_NOT_REACHED(); } + NO_RETURN_DUE_TO_ASSERT void atomNamedBackReference(const String&) { RELEASE_ASSERT_NOT_REACHED(); } + bool isValidNamedForwardReference(const String&) { RELEASE_ASSERT_NOT_REACHED(); return false; } + NO_RETURN_DUE_TO_ASSERT void atomNamedForwardReference(const String&) { RELEASE_ASSERT_NOT_REACHED(); } private: Delegate& m_delegate; @@ -421,9 +423,16 @@ private: if (!atEndOfPattern() && !inCharacterClass) { if (consume() == '<') { auto groupName = tryConsumeGroupName(); - if (groupName && m_captureGroupNames.contains(groupName.value())) { - delegate.atomNamedBackReference(groupName.value()); - break; + if (groupName) { + if (m_captureGroupNames.contains(groupName.value())) { + delegate.atomNamedBackReference(groupName.value()); + break; + } + + if (delegate.isValidNamedForwardReference(groupName.value())) { + delegate.atomNamedForwardReference(groupName.value()); + break; + } } if (m_isUnicode) { m_errorCode = ErrorCode::InvalidBackreference; @@ -1133,11 +1142,13 @@ private: * void atomCharacterClassRange(UChar32 begin, UChar32 end) * void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert) * void atomCharacterClassEnd() - * void atomParenthesesSubpatternBegin(bool capture = true, std::optional<String> groupName); + * void atomParenthesesSubpatternBegin(bool capture = true, Optional<String> groupName); * void atomParentheticalAssertionBegin(bool invert = false); * void atomParenthesesEnd(); * void atomBackReference(unsigned subpatternId); - * void atomNamedBackReference(String subpatternName); + * void atomNamedBackReference(const String& subpatternName); + * bool isValidNamedForwardReference(const String& subpatternName); + * void atomNamedForwardReference(const String& subpatternName); * * void quantifyAtom(unsigned min, unsigned max, bool greedy); * diff --git a/src/3rdparty/masm/yarr/YarrPattern.cpp b/src/3rdparty/masm/yarr/YarrPattern.cpp index ac66ea1b9a..9c1cdadf3f 100644 --- a/src/3rdparty/masm/yarr/YarrPattern.cpp +++ b/src/3rdparty/masm/yarr/YarrPattern.cpp @@ -33,12 +33,9 @@ #include "YarrParser.h" #include <wtf/DataLog.h> #include <wtf/Optional.h> -//#include <wtf/Threading.h> #include <wtf/Vector.h> #include <wtf/text/WTFString.h> -using namespace WTF; - namespace JSC { namespace Yarr { #include "RegExpJitTables.h" @@ -334,7 +331,7 @@ private: ranges.insert(i, CharacterRange(lo, hi)); return; } - // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining + // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the beginning // If the new range start at or before the end of the last range, then the overlap (if it starts one after the // end of the last range they concatenate, which is just as good. if (lo <= (ranges[i].end + 1)) { @@ -446,9 +443,9 @@ public: { } - void reset() + void resetForReparsing() { - m_pattern.reset(); + m_pattern.resetForReparsing(); m_characterClassConstructor.reset(); auto body = std::make_unique<PatternDisjunction>(); @@ -456,7 +453,17 @@ public: m_alternative = body->addNewAlternative(); m_pattern.m_disjunctions.append(WTFMove(body)); } - + + void saveUnmatchedNamedForwardReferences() + { + m_unmatchedNamedForwardReferences.shrink(0); + + for (auto& entry : m_pattern.m_namedForwardReferences) { + if (!m_pattern.m_captureGroupNames.contains(entry)) + m_unmatchedNamedForwardReferences.append(entry); + } + } + void assertionBOL() { if (!m_alternative->m_terms.size() && !m_invertParentheticalAssertion) { @@ -666,12 +673,24 @@ public: m_alternative->m_terms.append(PatternTerm(subpatternId)); } - void atomNamedBackReference(String subpatternName) + void atomNamedBackReference(const String& subpatternName) { ASSERT(m_pattern.m_namedGroupToParenIndex.find(subpatternName) != m_pattern.m_namedGroupToParenIndex.end()); atomBackReference(m_pattern.m_namedGroupToParenIndex.get(subpatternName)); } + bool isValidNamedForwardReference(const String& subpatternName) + { + return !m_unmatchedNamedForwardReferences.contains(subpatternName); + } + + void atomNamedForwardReference(const String& subpatternName) + { + if (!m_pattern.m_namedForwardReferences.contains(subpatternName)) + m_pattern.m_namedForwardReferences.append(subpatternName); + m_alternative->m_terms.append(PatternTerm::ForwardReference()); + } + // deep copy the argument disjunction. If filterStartsWithBOL is true, // skip alternatives with m_startsWithBOL set true. PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false) @@ -1079,6 +1098,7 @@ private: YarrPattern& m_pattern; PatternAlternative* m_alternative; CharacterClassConstructor m_characterClassConstructor; + Vector<String> m_unmatchedNamedForwardReferences; void* m_stackLimit; bool m_invertCharacterClass; bool m_invertParentheticalAssertion { false }; @@ -1101,13 +1121,14 @@ ErrorCode YarrPattern::compile(const String& patternString, void* stackLimit) // Quoting Netscape's "What's new in JavaScript 1.2", // "Note: if the number of left parentheses is less than the number specified // in \#, the \# is taken as an octal escape as described in the next row." - if (containsIllegalBackReference()) { + if (containsIllegalBackReference() || containsIllegalNamedForwardReferences()) { if (unicode()) return ErrorCode::InvalidBackreference; unsigned numSubpatterns = m_numSubpatterns; - constructor.reset(); + constructor.saveUnmatchedNamedForwardReferences(); + constructor.resetForReparsing(); ErrorCode error = parse(constructor, patternString, unicode(), numSubpatterns); ASSERT_UNUSED(error, !hasError(error)); ASSERT(numSubpatterns == m_numSubpatterns); @@ -1168,7 +1189,7 @@ void dumpCharacterClass(PrintStream& out, YarrPattern* pattern, CharacterClass* else if (characterClass == pattern->wordcharCharacterClass()) out.print("<word>"); else if (characterClass == pattern->wordUnicodeIgnoreCaseCharCharacterClass()) - out.print("<unicode ignore case>"); + out.print("<unicode word ignore case>"); else if (characterClass == pattern->nondigitsCharacterClass()) out.print("<non-digits>"); else if (characterClass == pattern->nonspacesCharacterClass()) @@ -1176,7 +1197,7 @@ void dumpCharacterClass(PrintStream& out, YarrPattern* pattern, CharacterClass* else if (characterClass == pattern->nonwordcharCharacterClass()) out.print("<non-word>"); else if (characterClass == pattern->nonwordUnicodeIgnoreCaseCharCharacterClass()) - out.print("<unicode non-ignore case>"); + out.print("<unicode non-word ignore case>"); else { bool needMatchesRangesSeperator = false; @@ -1298,75 +1319,7 @@ void PatternTerm::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nest break; case TypeCharacterClass: out.print("character class "); - if (characterClass->m_anyCharacter) - out.print("<any character>"); - else if (characterClass == thisPattern->newlineCharacterClass()) - out.print("<newline>"); - else if (characterClass == thisPattern->digitsCharacterClass()) - out.print("<digits>"); - else if (characterClass == thisPattern->spacesCharacterClass()) - out.print("<whitespace>"); - else if (characterClass == thisPattern->wordcharCharacterClass()) - out.print("<word>"); - else if (characterClass == thisPattern->wordUnicodeIgnoreCaseCharCharacterClass()) - out.print("<unicode ignore case>"); - else if (characterClass == thisPattern->nondigitsCharacterClass()) - out.print("<non-digits>"); - else if (characterClass == thisPattern->nonspacesCharacterClass()) - out.print("<non-whitespace>"); - else if (characterClass == thisPattern->nonwordcharCharacterClass()) - out.print("<non-word>"); - else if (characterClass == thisPattern->nonwordUnicodeIgnoreCaseCharCharacterClass()) - out.print("<unicode non-ignore case>"); - else { - bool needMatchesRangesSeperator = false; - - auto dumpMatches = [&] (const char* prefix, Vector<UChar32> matches) { - size_t matchesSize = matches.size(); - if (matchesSize) { - if (needMatchesRangesSeperator) - out.print(","); - needMatchesRangesSeperator = true; - - out.print(prefix, ":("); - for (size_t i = 0; i < matchesSize; ++i) { - if (i) - out.print(","); - dumpUChar32(out, matches[i]); - } - out.print(")"); - } - }; - - auto dumpRanges = [&] (const char* prefix, Vector<CharacterRange> ranges) { - size_t rangeSize = ranges.size(); - if (rangeSize) { - if (needMatchesRangesSeperator) - out.print(","); - needMatchesRangesSeperator = true; - - out.print(prefix, " ranges:("); - for (size_t i = 0; i < rangeSize; ++i) { - if (i) - out.print(","); - CharacterRange range = ranges[i]; - out.print("("); - dumpUChar32(out, range.begin); - out.print(".."); - dumpUChar32(out, range.end); - out.print(")"); - } - out.print(")"); - } - }; - - out.print("["); - dumpMatches("ASCII", characterClass->m_matches); - dumpRanges("ASCII", characterClass->m_ranges); - dumpMatches("Unicode", characterClass->m_matchesUnicode); - dumpRanges("Unicode", characterClass->m_rangesUnicode); - out.print("]"); - } + dumpCharacterClass(out, thisPattern, characterClass); dumpQuantifier(out); if (quantityType != QuantifierFixedCount || thisPattern->unicode()) out.print(",frame location ", frameLocation); @@ -1439,16 +1392,10 @@ void PatternDisjunction::dump(PrintStream& out, YarrPattern* thisPattern, unsign } } -void YarrPattern::dumpPattern(const String& patternString) +void YarrPattern::dumpPatternString(PrintStream& out, const String& patternString) { - dumpPattern(WTF::dataFile(), patternString); -} + out.print("/", patternString, "/"); -void YarrPattern::dumpPattern(PrintStream& out, const String& patternString) -{ - out.print("RegExp pattern for /"); - out.print(patternString); - out.print("/"); if (global()) out.print("g"); if (ignoreCase()) @@ -1459,6 +1406,18 @@ void YarrPattern::dumpPattern(PrintStream& out, const String& patternString) out.print("u"); if (sticky()) out.print("y"); +} + +void YarrPattern::dumpPattern(const String& patternString) +{ + dumpPattern(WTF::dataFile(), patternString); +} + +void YarrPattern::dumpPattern(PrintStream& out, const String& patternString) +{ + out.print("RegExp pattern for "); + dumpPatternString(out, patternString); + if (m_flags != NoFlags) { bool printSeperator = false; out.print(" ("); diff --git a/src/3rdparty/masm/yarr/YarrPattern.h b/src/3rdparty/masm/yarr/YarrPattern.h index f7ddf861ba..10ea2c5b94 100644 --- a/src/3rdparty/masm/yarr/YarrPattern.h +++ b/src/3rdparty/masm/yarr/YarrPattern.h @@ -355,7 +355,7 @@ struct TermChain { struct YarrPattern { JS_EXPORT_PRIVATE YarrPattern(const String& pattern, RegExpFlags, ErrorCode&, void* stackLimit = nullptr); - void reset() + void resetForReparsing() { m_numSubpatterns = 0; m_maxBackReference = 0; @@ -382,6 +382,7 @@ struct YarrPattern { m_disjunctions.clear(); m_userCharacterClasses.clear(); m_captureGroupNames.shrink(0); + m_namedForwardReferences.shrink(0); } bool containsIllegalBackReference() @@ -389,6 +390,19 @@ struct YarrPattern { return m_maxBackReference > m_numSubpatterns; } + bool containsIllegalNamedForwardReferences() + { + if (m_namedForwardReferences.isEmpty()) + return false; + + for (auto& entry : m_namedForwardReferences) { + if (m_captureGroupNames.contains(entry)) + return true; + } + + return false; + } + bool containsUnsignedLengthPattern() { return m_containsUnsignedLengthPattern; @@ -490,6 +504,7 @@ struct YarrPattern { return unicodePropertiesCached.get(classID); } + void dumpPatternString(PrintStream& out, const String& patternString); void dumpPattern(const String& pattern); void dumpPattern(PrintStream& out, const String& pattern); @@ -513,6 +528,7 @@ struct YarrPattern { Vector<std::unique_ptr<PatternDisjunction>, 4> m_disjunctions; Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses; Vector<String> m_captureGroupNames; + Vector<String> m_namedForwardReferences; HashMap<String, unsigned> m_namedGroupToParenIndex; private: @@ -555,8 +571,8 @@ private: uintptr_t begin; // Not really needed for greedy quantifiers. uintptr_t matchAmount; // Not really needed for fixed quantifiers. - unsigned beginIndex() { return offsetof(BackTrackInfoBackReference, begin) / sizeof(uintptr_t); } - unsigned matchAmountIndex() { return offsetof(BackTrackInfoBackReference, matchAmount) / sizeof(uintptr_t); } + static unsigned beginIndex() { return offsetof(BackTrackInfoBackReference, begin) / sizeof(uintptr_t); } + static unsigned matchAmountIndex() { return offsetof(BackTrackInfoBackReference, matchAmount) / sizeof(uintptr_t); } }; struct BackTrackInfoAlternative { diff --git a/src/3rdparty/masm/yarr/YarrSyntaxChecker.cpp b/src/3rdparty/masm/yarr/YarrSyntaxChecker.cpp index 9f05f22852..358cc94d6b 100644 --- a/src/3rdparty/masm/yarr/YarrSyntaxChecker.cpp +++ b/src/3rdparty/masm/yarr/YarrSyntaxChecker.cpp @@ -48,7 +48,9 @@ public: void atomParentheticalAssertionBegin(bool = false) {} void atomParenthesesEnd() {} void atomBackReference(unsigned) {} - void atomNamedBackReference(String) {} + void atomNamedBackReference(const String&) {} + bool isValidNamedForwardReference(const String&) { return true; } + void atomNamedForwardReference(const String&) {} void quantifyAtom(unsigned, unsigned, bool) {} void disjunction() {} }; diff --git a/src/3rdparty/masm/yarr/create_regex_tables b/src/3rdparty/masm/yarr/create_regex_tables index 4c3dbbe3fb..992566db77 100644 --- a/src/3rdparty/masm/yarr/create_regex_tables +++ b/src/3rdparty/masm/yarr/create_regex_tables @@ -32,7 +32,7 @@ types = { "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x10ffff)]}, "nonwordUnicodeIgnoreCaseChar": { "UseTable" : False, "Inverse": "wordUnicodeIgnoreCaseChar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x017e), (0x0180, 0x2129), (0x212b, 0x10ffff)]}, "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]}, - "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]}, + "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]}, "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0x10ffff)]}, "digits": { "UseTable" : False, "data": [('0', '9')]}, "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0x10ffff)] } diff --git a/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode b/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode index a103bcdf16..95549c7eb5 100644 --- a/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode +++ b/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode @@ -31,7 +31,6 @@ import optparse import os import re import sys -from sets import Set header = """/* * Copyright (C) 2016 Apple Inc. All rights reserved. @@ -78,9 +77,12 @@ def openOrExit(path, mode): dirname = os.path.dirname(path) if not os.path.isdir(dirname): os.makedirs(dirname) - return open(path, mode) + if sys.version_info.major >= 3: + return open(path, mode, encoding="UTF-8") + else: + return open(path, mode) except IOError as e: - print "I/O error opening {0}, ({1}): {2}".format(path, e.errno, e.strerror) + print("I/O error opening {0}, ({1}): {2}".format(path, e.errno, e.strerror)) exit(1) class Canonicalize: @@ -93,7 +95,7 @@ class Canonicalize: self.canonicalGroups[mapping].append(code) def readCaseFolding(self, file): - codesSeen = Set() + codesSeen = set() for line in file: line = line.split('#', 1)[0] line = line.rstrip() @@ -154,8 +156,8 @@ class Canonicalize: for i in range(len(characterSets)): characters = "" - set = characterSets[i] - for ch in set: + cur_set = characterSets[i] + for ch in cur_set: characters = characters + "0x{character:04x}, ".format(character=ch) file.write("const UChar32 unicodeCharacterSet{index:d}[] = {{ {characters}0 }};\n".format(index=i, characters=characters)) @@ -189,7 +191,7 @@ if __name__ == "__main__": caseFoldingTxtPath = args[0] canonicalizeHPath = args[1] caseFoldingTxtFile = openOrExit(caseFoldingTxtPath, "r") - canonicalizeHFile = openOrExit(canonicalizeHPath, "wb") + canonicalizeHFile = openOrExit(canonicalizeHPath, "w") canonicalize = Canonicalize() canonicalize.readCaseFolding(caseFoldingTxtFile) |