summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/pcre2/src/sljit
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/pcre2/src/sljit')
-rw-r--r--src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorApple.c133
-rw-r--r--src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorCore.c330
-rw-r--r--src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c89
-rw-r--r--src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorPosix.c62
-rw-r--r--src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorWindows.c40
-rw-r--r--src/3rdparty/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c72
-rw-r--r--src/3rdparty/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorPosix.c172
-rw-r--r--src/3rdparty/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorPosix.c141
-rw-r--r--src/3rdparty/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorWindows.c102
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitConfig.h26
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitConfigCPU.h188
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h338
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitLir.c1628
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitLir.h936
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c1941
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c1283
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c1303
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c172
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c78
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c1177
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c153
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c152
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c753
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_32.c71
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_64.c43
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_common.c603
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c1246
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c465
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c446
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c3117
30 files changed, 13273 insertions, 3987 deletions
diff --git a/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorApple.c b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorApple.c
new file mode 100644
index 0000000000..95b9842fa9
--- /dev/null
+++ b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorApple.c
@@ -0,0 +1,133 @@
+/*
+ * Stack-less Just-In-Time compiler
+ *
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+/*
+ On macOS systems, returns MAP_JIT if it is defined _and_ we're running on a
+ version where it's OK to have more than one JIT block or where MAP_JIT is
+ required.
+ On non-macOS systems, returns MAP_JIT if it is defined.
+*/
+#include <TargetConditionals.h>
+
+#if (defined(TARGET_OS_OSX) && TARGET_OS_OSX) || (TARGET_OS_MAC && !TARGET_OS_IPHONE)
+
+#if defined(SLJIT_CONFIG_X86) && SLJIT_CONFIG_X86
+
+#include <sys/utsname.h>
+#include <stdlib.h>
+
+#define SLJIT_MAP_JIT (get_map_jit_flag())
+#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
+
+static SLJIT_INLINE int get_map_jit_flag(void)
+{
+ size_t page_size;
+ void *ptr;
+ struct utsname name;
+ static int map_jit_flag = -1;
+
+ if (map_jit_flag < 0) {
+ map_jit_flag = 0;
+ uname(&name);
+
+ /* Kernel version for 10.14.0 (Mojave) or later */
+ if (atoi(name.release) >= 18) {
+ page_size = get_page_alignment() + 1;
+ /* Only use MAP_JIT if a hardened runtime is used */
+ ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+
+ if (ptr != MAP_FAILED)
+ munmap(ptr, page_size);
+ else
+ map_jit_flag = MAP_JIT;
+ }
+ }
+ return map_jit_flag;
+}
+
+#elif defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM
+
+#include <AvailabilityMacros.h>
+#include <pthread.h>
+
+#define SLJIT_MAP_JIT (MAP_JIT)
+#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \
+ apple_update_wx_flags(enable_exec)
+
+static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec)
+{
+#if MAC_OS_X_VERSION_MIN_REQUIRED < 110000
+ if (__builtin_available(macos 11, *))
+#endif /* BigSur */
+ pthread_jit_write_protect_np(enable_exec);
+}
+
+#elif defined(SLJIT_CONFIG_PPC) && SLJIT_CONFIG_PPC
+
+#define SLJIT_MAP_JIT (0)
+#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
+
+#else
+#error "Unsupported architecture"
+#endif /* SLJIT_CONFIG */
+
+#else /* !TARGET_OS_OSX */
+
+#ifdef MAP_JIT
+#define SLJIT_MAP_JIT (MAP_JIT)
+#else
+#define SLJIT_MAP_JIT (0)
+#endif
+
+#endif /* TARGET_OS_OSX */
+
+static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+{
+ void *retval;
+ int prot = PROT_READ | PROT_WRITE | PROT_EXEC;
+ int flags = MAP_PRIVATE;
+ int fd = -1;
+
+ flags |= MAP_ANON | SLJIT_MAP_JIT;
+
+ retval = mmap(NULL, size, prot, flags, fd, 0);
+ if (retval == MAP_FAILED)
+ return NULL;
+
+ SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0);
+
+ return retval;
+}
+
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
+{
+ munmap(chunk, size);
+}
+
+#include "sljitExecAllocatorCore.c"
diff --git a/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorCore.c b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorCore.c
new file mode 100644
index 0000000000..6cd391104c
--- /dev/null
+++ b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorCore.c
@@ -0,0 +1,330 @@
+/*
+ * Stack-less Just-In-Time compiler
+ *
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ This file contains a simple executable memory allocator
+
+ It is assumed, that executable code blocks are usually medium (or sometimes
+ large) memory blocks, and the allocator is not too frequently called (less
+ optimized than other allocators). Thus, using it as a generic allocator is
+ not suggested.
+
+ How does it work:
+ Memory is allocated in continuous memory areas called chunks by alloc_chunk()
+ Chunk format:
+ [ block ][ block ] ... [ block ][ block terminator ]
+
+ All blocks and the block terminator is started with block_header. The block
+ header contains the size of the previous and the next block. These sizes
+ can also contain special values.
+ Block size:
+ 0 - The block is a free_block, with a different size member.
+ 1 - The block is a block terminator.
+ n - The block is used at the moment, and the value contains its size.
+ Previous block size:
+ 0 - This is the first block of the memory chunk.
+ n - The size of the previous block.
+
+ Using these size values we can go forward or backward on the block chain.
+ The unused blocks are stored in a chain list pointed by free_blocks. This
+ list is useful if we need to find a suitable memory area when the allocator
+ is called.
+
+ When a block is freed, the new free block is connected to its adjacent free
+ blocks if possible.
+
+ [ free block ][ used block ][ free block ]
+ and "used block" is freed, the three blocks are connected together:
+ [ one big free block ]
+*/
+
+/* Expected functions:
+ alloc_chunk / free_chunk :
+ * allocate executable system memory chunks
+ * the size is always divisible by CHUNK_SIZE
+ SLJIT_ALLOCATOR_LOCK / SLJIT_ALLOCATOR_UNLOCK :
+ * provided as part of sljitUtils
+ * only the allocator requires this lock, sljit is fully thread safe
+ as it only uses local variables
+
+ Supported defines:
+ SLJIT_HAS_CHUNK_HEADER - (optional) sljit_chunk_header is defined
+ SLJIT_HAS_EXECUTABLE_OFFSET - (optional) has executable offset data
+ SLJIT_UPDATE_WX_FLAGS - (optional) update WX flags
+*/
+
+#ifdef SLJIT_HAS_CHUNK_HEADER
+#define CHUNK_HEADER_SIZE (sizeof(struct sljit_chunk_header))
+#else /* !SLJIT_HAS_CHUNK_HEADER */
+#define CHUNK_HEADER_SIZE 0
+#endif /* SLJIT_HAS_CHUNK_HEADER */
+
+#ifndef SLJIT_UPDATE_WX_FLAGS
+#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
+#endif /* SLJIT_UPDATE_WX_FLAGS */
+
+#ifndef CHUNK_SIZE
+/* 64 KByte if not specified. */
+#define CHUNK_SIZE (sljit_uw)0x10000
+#endif /* CHUNK_SIZE */
+
+struct block_header {
+ sljit_uw size;
+ sljit_uw prev_size;
+#ifdef SLJIT_HAS_EXECUTABLE_OFFSET
+ sljit_sw executable_offset;
+#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */
+};
+
+struct free_block {
+ struct block_header header;
+ struct free_block *next;
+ struct free_block *prev;
+ sljit_uw size;
+};
+
+#define AS_BLOCK_HEADER(base, offset) \
+ ((struct block_header*)(((sljit_u8*)base) + offset))
+#define AS_FREE_BLOCK(base, offset) \
+ ((struct free_block*)(((sljit_u8*)base) + offset))
+#define MEM_START(base) ((void*)((base) + 1))
+#define CHUNK_MASK (~(CHUNK_SIZE - 1))
+#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7u) & ~(sljit_uw)7)
+#define CHUNK_EXTRA_SIZE (sizeof(struct block_header) + CHUNK_HEADER_SIZE)
+
+static struct free_block* free_blocks;
+static sljit_uw allocated_size;
+static sljit_uw total_size;
+
+static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size)
+{
+ free_block->header.size = 0;
+ free_block->size = size;
+
+ free_block->next = free_blocks;
+ free_block->prev = NULL;
+ if (free_blocks)
+ free_blocks->prev = free_block;
+ free_blocks = free_block;
+}
+
+static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block)
+{
+ if (free_block->next)
+ free_block->next->prev = free_block->prev;
+
+ if (free_block->prev)
+ free_block->prev->next = free_block->next;
+ else {
+ SLJIT_ASSERT(free_blocks == free_block);
+ free_blocks = free_block->next;
+ }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
+{
+ struct block_header *header;
+ struct block_header *next_header;
+ struct free_block *free_block;
+ sljit_uw chunk_size;
+
+#ifdef SLJIT_HAS_CHUNK_HEADER
+ struct sljit_chunk_header *chunk_header;
+#else /* !SLJIT_HAS_CHUNK_HEADER */
+ void *chunk_header;
+#endif /* SLJIT_HAS_CHUNK_HEADER */
+
+#ifdef SLJIT_HAS_EXECUTABLE_OFFSET
+ sljit_sw executable_offset;
+#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */
+
+ if (size < (64 - sizeof(struct block_header)))
+ size = (64 - sizeof(struct block_header));
+ size = ALIGN_SIZE(size);
+
+ SLJIT_ALLOCATOR_LOCK();
+ free_block = free_blocks;
+ while (free_block) {
+ if (free_block->size >= size) {
+ chunk_size = free_block->size;
+ SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0);
+ if (chunk_size > size + 64) {
+ /* We just cut a block from the end of the free block. */
+ chunk_size -= size;
+ free_block->size = chunk_size;
+ header = AS_BLOCK_HEADER(free_block, chunk_size);
+ header->prev_size = chunk_size;
+#ifdef SLJIT_HAS_EXECUTABLE_OFFSET
+ header->executable_offset = free_block->header.executable_offset;
+#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */
+ AS_BLOCK_HEADER(header, size)->prev_size = size;
+ }
+ else {
+ sljit_remove_free_block(free_block);
+ header = (struct block_header*)free_block;
+ size = chunk_size;
+ }
+ allocated_size += size;
+ header->size = size;
+ SLJIT_ALLOCATOR_UNLOCK();
+ return MEM_START(header);
+ }
+ free_block = free_block->next;
+ }
+
+ chunk_size = (size + CHUNK_EXTRA_SIZE + CHUNK_SIZE - 1) & CHUNK_MASK;
+
+ chunk_header = alloc_chunk(chunk_size);
+ if (!chunk_header) {
+ SLJIT_ALLOCATOR_UNLOCK();
+ return NULL;
+ }
+
+#ifdef SLJIT_HAS_EXECUTABLE_OFFSET
+ executable_offset = (sljit_sw)((sljit_u8*)chunk_header->executable - (sljit_u8*)chunk_header);
+#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */
+
+ chunk_size -= CHUNK_EXTRA_SIZE;
+ total_size += chunk_size;
+
+ header = (struct block_header*)(((sljit_u8*)chunk_header) + CHUNK_HEADER_SIZE);
+
+ header->prev_size = 0;
+#ifdef SLJIT_HAS_EXECUTABLE_OFFSET
+ header->executable_offset = executable_offset;
+#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */
+
+ if (chunk_size > size + 64) {
+ /* Cut the allocated space into a free and a used block. */
+ allocated_size += size;
+ header->size = size;
+ chunk_size -= size;
+
+ free_block = AS_FREE_BLOCK(header, size);
+ free_block->header.prev_size = size;
+#ifdef SLJIT_HAS_EXECUTABLE_OFFSET
+ free_block->header.executable_offset = executable_offset;
+#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */
+ sljit_insert_free_block(free_block, chunk_size);
+ next_header = AS_BLOCK_HEADER(free_block, chunk_size);
+ }
+ else {
+ /* All space belongs to this allocation. */
+ allocated_size += chunk_size;
+ header->size = chunk_size;
+ next_header = AS_BLOCK_HEADER(header, chunk_size);
+ }
+ SLJIT_ALLOCATOR_UNLOCK();
+ next_header->size = 1;
+ next_header->prev_size = chunk_size;
+#ifdef SLJIT_HAS_EXECUTABLE_OFFSET
+ next_header->executable_offset = executable_offset;
+#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */
+ return MEM_START(header);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
+{
+ struct block_header *header;
+ struct free_block* free_block;
+
+ SLJIT_ALLOCATOR_LOCK();
+ header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header));
+#ifdef SLJIT_HAS_EXECUTABLE_OFFSET
+ header = AS_BLOCK_HEADER(header, -header->executable_offset);
+#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */
+ allocated_size -= header->size;
+
+ SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0);
+
+ /* Connecting free blocks together if possible. */
+
+ /* If header->prev_size == 0, free_block will equal to header.
+ In this case, free_block->header.size will be > 0. */
+ free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size);
+ if (SLJIT_UNLIKELY(!free_block->header.size)) {
+ free_block->size += header->size;
+ header = AS_BLOCK_HEADER(free_block, free_block->size);
+ header->prev_size = free_block->size;
+ }
+ else {
+ free_block = (struct free_block*)header;
+ sljit_insert_free_block(free_block, header->size);
+ }
+
+ header = AS_BLOCK_HEADER(free_block, free_block->size);
+ if (SLJIT_UNLIKELY(!header->size)) {
+ free_block->size += ((struct free_block*)header)->size;
+ sljit_remove_free_block((struct free_block*)header);
+ header = AS_BLOCK_HEADER(free_block, free_block->size);
+ header->prev_size = free_block->size;
+ }
+
+ /* The whole chunk is free. */
+ if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) {
+ /* If this block is freed, we still have (allocated_size / 2) free space. */
+ if (total_size - free_block->size > (allocated_size * 3 / 2)) {
+ total_size -= free_block->size;
+ sljit_remove_free_block(free_block);
+ free_chunk(free_block, free_block->size + CHUNK_EXTRA_SIZE);
+ }
+ }
+
+ SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1);
+ SLJIT_ALLOCATOR_UNLOCK();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
+{
+ struct free_block* free_block;
+ struct free_block* next_free_block;
+
+ SLJIT_ALLOCATOR_LOCK();
+ SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0);
+
+ free_block = free_blocks;
+ while (free_block) {
+ next_free_block = free_block->next;
+ if (!free_block->header.prev_size &&
+ AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) {
+ total_size -= free_block->size;
+ sljit_remove_free_block(free_block);
+ free_chunk(free_block, free_block->size + CHUNK_EXTRA_SIZE);
+ }
+ free_block = next_free_block;
+ }
+
+ SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
+ SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1);
+ SLJIT_ALLOCATOR_UNLOCK();
+}
+
+#ifdef SLJIT_HAS_EXECUTABLE_OFFSET
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr)
+{
+ return ((struct block_header *)(ptr))[-1].executable_offset;
+}
+#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */
diff --git a/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c
new file mode 100644
index 0000000000..3b93a4df76
--- /dev/null
+++ b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c
@@ -0,0 +1,89 @@
+/*
+ * Stack-less Just-In-Time compiler
+ *
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/mman.h>
+#include <sys/procctl.h>
+
+#ifdef PROC_WXMAP_CTL
+static SLJIT_INLINE int sljit_is_wx_block(void)
+{
+ static int wx_block = -1;
+ if (wx_block < 0) {
+ int sljit_wx_enable = PROC_WX_MAPPINGS_PERMIT;
+ wx_block = !!procctl(P_PID, 0, PROC_WXMAP_CTL, &sljit_wx_enable);
+ }
+ return wx_block;
+}
+
+#define SLJIT_IS_WX_BLOCK sljit_is_wx_block()
+#else /* !PROC_WXMAP_CTL */
+#define SLJIT_IS_WX_BLOCK (1)
+#endif /* PROC_WXMAP_CTL */
+
+static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+{
+ void *retval;
+ int prot = PROT_READ | PROT_WRITE | PROT_EXEC;
+ int flags = MAP_PRIVATE;
+ int fd = -1;
+
+#ifdef PROT_MAX
+ prot |= PROT_MAX(prot);
+#endif
+
+#ifdef MAP_ANON
+ flags |= MAP_ANON;
+#else /* !MAP_ANON */
+ if (SLJIT_UNLIKELY((dev_zero < 0) && open_dev_zero()))
+ return NULL;
+
+ fd = dev_zero;
+#endif /* MAP_ANON */
+
+retry:
+ retval = mmap(NULL, size, prot, flags, fd, 0);
+ if (retval == MAP_FAILED) {
+ if (!SLJIT_IS_WX_BLOCK)
+ goto retry;
+
+ return NULL;
+ }
+
+ /* HardenedBSD's mmap lies, so check permissions again. */
+ if (mprotect(retval, size, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) {
+ munmap(retval, size);
+ return NULL;
+ }
+
+ return retval;
+}
+
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
+{
+ munmap(chunk, size);
+}
+
+#include "sljitExecAllocatorCore.c"
diff --git a/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorPosix.c b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorPosix.c
new file mode 100644
index 0000000000..a775f5629a
--- /dev/null
+++ b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorPosix.c
@@ -0,0 +1,62 @@
+/*
+ * Stack-less Just-In-Time compiler
+ *
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+
+static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+{
+ void *retval;
+ int prot = PROT_READ | PROT_WRITE | PROT_EXEC;
+ int flags = MAP_PRIVATE;
+ int fd = -1;
+
+#ifdef PROT_MAX
+ prot |= PROT_MAX(prot);
+#endif
+
+#ifdef MAP_ANON
+ flags |= MAP_ANON;
+#else /* !MAP_ANON */
+ if (SLJIT_UNLIKELY((dev_zero < 0) && open_dev_zero()))
+ return NULL;
+
+ fd = dev_zero;
+#endif /* MAP_ANON */
+
+ retval = mmap(NULL, size, prot, flags, fd, 0);
+ if (retval == MAP_FAILED)
+ return NULL;
+
+ return retval;
+}
+
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
+{
+ munmap(chunk, size);
+}
+
+#include "sljitExecAllocatorCore.c"
diff --git a/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorWindows.c b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorWindows.c
new file mode 100644
index 0000000000..f152a5a2cd
--- /dev/null
+++ b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorWindows.c
@@ -0,0 +1,40 @@
+/*
+ * Stack-less Just-In-Time compiler
+ *
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
+
+static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+{
+ return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
+}
+
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
+{
+ SLJIT_UNUSED_ARG(size);
+ VirtualFree(chunk, 0, MEM_RELEASE);
+}
+
+#include "sljitExecAllocatorCore.c"
diff --git a/src/3rdparty/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c
new file mode 100644
index 0000000000..0b7fd57787
--- /dev/null
+++ b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c
@@ -0,0 +1,72 @@
+/*
+ * Stack-less Just-In-Time compiler
+ *
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define SLJIT_HAS_CHUNK_HEADER
+#define SLJIT_HAS_EXECUTABLE_OFFSET
+
+struct sljit_chunk_header {
+ void *executable;
+};
+
+/*
+ * MAP_REMAPDUP is a NetBSD extension available sinde 8.0, make sure to
+ * adjust your feature macros (ex: -D_NETBSD_SOURCE) as needed
+ */
+static SLJIT_INLINE struct sljit_chunk_header* alloc_chunk(sljit_uw size)
+{
+ struct sljit_chunk_header *retval;
+
+ retval = (struct sljit_chunk_header *)mmap(NULL, size,
+ PROT_READ | PROT_WRITE | PROT_MPROTECT(PROT_EXEC),
+ MAP_ANON | MAP_SHARED, -1, 0);
+
+ if (retval == MAP_FAILED)
+ return NULL;
+
+ retval->executable = mremap(retval, size, NULL, size, MAP_REMAPDUP);
+ if (retval->executable == MAP_FAILED) {
+ munmap((void *)retval, size);
+ return NULL;
+ }
+
+ if (mprotect(retval->executable, size, PROT_READ | PROT_EXEC) == -1) {
+ munmap(retval->executable, size);
+ munmap((void *)retval, size);
+ return NULL;
+ }
+
+ return retval;
+}
+
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
+{
+ struct sljit_chunk_header *header = ((struct sljit_chunk_header *)chunk) - 1;
+
+ munmap(header->executable, size);
+ munmap((void *)header, size);
+}
+
+#include "sljitExecAllocatorCore.c"
diff --git a/src/3rdparty/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorPosix.c b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorPosix.c
new file mode 100644
index 0000000000..f7cb6c5670
--- /dev/null
+++ b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorPosix.c
@@ -0,0 +1,172 @@
+/*
+ * Stack-less Just-In-Time compiler
+ *
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define SLJIT_HAS_CHUNK_HEADER
+#define SLJIT_HAS_EXECUTABLE_OFFSET
+
+struct sljit_chunk_header {
+ void *executable;
+};
+
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+
+#ifndef O_NOATIME
+#define O_NOATIME 0
+#endif
+
+/* this is a linux extension available since kernel 3.11 */
+#ifndef O_TMPFILE
+#define O_TMPFILE 0x404000
+#endif
+
+#ifndef _GNU_SOURCE
+char *secure_getenv(const char *name);
+int mkostemp(char *template, int flags);
+#endif
+
+static SLJIT_INLINE int create_tempfile(void)
+{
+ int fd;
+ char tmp_name[256];
+ size_t tmp_name_len = 0;
+ char *dir;
+ struct stat st;
+#if defined(SLJIT_SINGLE_THREADED) && SLJIT_SINGLE_THREADED
+ mode_t mode;
+#endif
+
+#ifdef HAVE_MEMFD_CREATE
+ /* this is a GNU extension, make sure to use -D_GNU_SOURCE */
+ fd = memfd_create("sljit", MFD_CLOEXEC);
+ if (fd != -1) {
+ fchmod(fd, 0);
+ return fd;
+ }
+#endif
+
+ dir = secure_getenv("TMPDIR");
+
+ if (dir) {
+ size_t len = strlen(dir);
+ if (len > 0 && len < sizeof(tmp_name)) {
+ if ((stat(dir, &st) == 0) && S_ISDIR(st.st_mode)) {
+ memcpy(tmp_name, dir, len + 1);
+ tmp_name_len = len;
+ }
+ }
+ }
+
+#ifdef P_tmpdir
+ if (!tmp_name_len) {
+ tmp_name_len = strlen(P_tmpdir);
+ if (tmp_name_len > 0 && tmp_name_len < sizeof(tmp_name))
+ strcpy(tmp_name, P_tmpdir);
+ }
+#endif
+ if (!tmp_name_len) {
+ strcpy(tmp_name, "/tmp");
+ tmp_name_len = 4;
+ }
+
+ SLJIT_ASSERT(tmp_name_len > 0 && tmp_name_len < sizeof(tmp_name));
+
+ if (tmp_name_len > 1 && tmp_name[tmp_name_len - 1] == '/')
+ tmp_name[--tmp_name_len] = '\0';
+
+ fd = open(tmp_name, O_TMPFILE | O_EXCL | O_RDWR | O_NOATIME | O_CLOEXEC, 0);
+ if (fd != -1)
+ return fd;
+
+ if (tmp_name_len >= sizeof(tmp_name) - 7)
+ return -1;
+
+ strcpy(tmp_name + tmp_name_len, "/XXXXXX");
+#if defined(SLJIT_SINGLE_THREADED) && SLJIT_SINGLE_THREADED
+ mode = umask(0777);
+#endif
+ fd = mkostemp(tmp_name, O_CLOEXEC | O_NOATIME);
+#if defined(SLJIT_SINGLE_THREADED) && SLJIT_SINGLE_THREADED
+ umask(mode);
+#else
+ fchmod(fd, 0);
+#endif
+
+ if (fd == -1)
+ return -1;
+
+ if (unlink(tmp_name)) {
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+static SLJIT_INLINE struct sljit_chunk_header* alloc_chunk(sljit_uw size)
+{
+ struct sljit_chunk_header *retval;
+ int fd;
+
+ fd = create_tempfile();
+ if (fd == -1)
+ return NULL;
+
+ if (ftruncate(fd, (off_t)size)) {
+ close(fd);
+ return NULL;
+ }
+
+ retval = (struct sljit_chunk_header *)mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+ if (retval == MAP_FAILED) {
+ close(fd);
+ return NULL;
+ }
+
+ retval->executable = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
+
+ if (retval->executable == MAP_FAILED) {
+ munmap((void *)retval, size);
+ close(fd);
+ return NULL;
+ }
+
+ close(fd);
+ return retval;
+}
+
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
+{
+ struct sljit_chunk_header *header = ((struct sljit_chunk_header *)chunk) - 1;
+
+ munmap(header->executable, size);
+ munmap((void *)header, size);
+}
+
+#include "sljitExecAllocatorCore.c"
diff --git a/src/3rdparty/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorPosix.c b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorPosix.c
new file mode 100644
index 0000000000..36d301434a
--- /dev/null
+++ b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorPosix.c
@@ -0,0 +1,141 @@
+/*
+ * Stack-less Just-In-Time compiler
+ *
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ This file contains a simple W^X executable memory allocator
+
+ In *NIX, MAP_ANON is required (that is considered a feature) so make
+ sure to set the right availability macros for your system or the code
+ will fail to build.
+
+ If your system doesn't support mapping of anonymous pages (ex: IRIX) it
+ is also likely that it doesn't need this allocator and should be using
+ the standard one instead.
+
+ It allocates a separate map for each code block and may waste a lot of
+ memory, because whatever was requested, will be rounded up to the page
+ size (minimum 4KB, but could be even bigger).
+
+ It changes the page permissions (RW <-> RX) as needed and therefore, if you
+ will be updating the code after it has been generated, need to make sure to
+ block any concurrent execution, or could result in a SIGBUS, that could
+ even manifest itself at a different address than the one that was being
+ modified.
+
+ Only use if you are unable to use the regular allocator because of security
+ restrictions and adding exceptions to your application or the system are
+ not possible.
+*/
+
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \
+ sljit_update_wx_flags((from), (to), (enable_exec))
+
+#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED)
+#include <pthread.h>
+#define SLJIT_SE_LOCK() pthread_mutex_lock(&se_lock)
+#define SLJIT_SE_UNLOCK() pthread_mutex_unlock(&se_lock)
+#else
+#define SLJIT_SE_LOCK()
+#define SLJIT_SE_UNLOCK()
+#endif /* !SLJIT_SINGLE_THREADED */
+
+#define SLJIT_WX_IS_BLOCK(ptr, size) generic_check_is_wx_block(ptr, size)
+
+static SLJIT_INLINE int generic_check_is_wx_block(void *ptr, sljit_uw size)
+{
+ if (SLJIT_LIKELY(!mprotect(ptr, size, PROT_EXEC)))
+ return !!mprotect(ptr, size, PROT_READ | PROT_WRITE);
+
+ return 1;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
+{
+#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED)
+ static pthread_mutex_t se_lock = PTHREAD_MUTEX_INITIALIZER;
+#endif
+ static int wx_block = -1;
+ int prot = PROT_READ | PROT_WRITE;
+ sljit_uw* ptr;
+
+ if (SLJIT_UNLIKELY(wx_block > 0))
+ return NULL;
+
+#ifdef PROT_MAX
+ prot |= PROT_MAX(PROT_READ | PROT_WRITE | PROT_EXEC);
+#endif
+
+ size += sizeof(sljit_uw);
+ ptr = (sljit_uw*)mmap(NULL, size, prot, MAP_PRIVATE | MAP_ANON, -1, 0);
+
+ if (ptr == MAP_FAILED)
+ return NULL;
+
+ if (SLJIT_UNLIKELY(wx_block < 0)) {
+ SLJIT_SE_LOCK();
+ wx_block = SLJIT_WX_IS_BLOCK(ptr, size);
+ SLJIT_SE_UNLOCK();
+ if (SLJIT_UNLIKELY(wx_block)) {
+ munmap((void *)ptr, size);
+ return NULL;
+ }
+ }
+
+ *ptr++ = size;
+ return ptr;
+}
+
+#undef SLJIT_SE_UNLOCK
+#undef SLJIT_SE_LOCK
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
+{
+ sljit_uw *start_ptr = ((sljit_uw*)ptr) - 1;
+ munmap((void*)start_ptr, *start_ptr);
+}
+
+static void sljit_update_wx_flags(void *from, void *to, int enable_exec)
+{
+ sljit_uw page_mask = (sljit_uw)get_page_alignment();
+ sljit_uw start = (sljit_uw)from;
+ sljit_uw end = (sljit_uw)to;
+ int prot = PROT_READ | (enable_exec ? PROT_EXEC : PROT_WRITE);
+
+ SLJIT_ASSERT(start < end);
+
+ start &= ~page_mask;
+ end = (end + page_mask) & ~page_mask;
+
+ mprotect((void*)start, end - start, prot);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
+{
+ /* This allocator does not keep unused memory for future allocations. */
+}
diff --git a/src/3rdparty/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorWindows.c b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorWindows.c
new file mode 100644
index 0000000000..a9553bd7da
--- /dev/null
+++ b/src/3rdparty/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorWindows.c
@@ -0,0 +1,102 @@
+/*
+ * Stack-less Just-In-Time compiler
+ *
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ This file contains a simple W^X executable memory allocator
+
+ In *NIX, MAP_ANON is required (that is considered a feature) so make
+ sure to set the right availability macros for your system or the code
+ will fail to build.
+
+ If your system doesn't support mapping of anonymous pages (ex: IRIX) it
+ is also likely that it doesn't need this allocator and should be using
+ the standard one instead.
+
+ It allocates a separate map for each code block and may waste a lot of
+ memory, because whatever was requested, will be rounded up to the page
+ size (minimum 4KB, but could be even bigger).
+
+ It changes the page permissions (RW <-> RX) as needed and therefore, if you
+ will be updating the code after it has been generated, need to make sure to
+ block any concurrent execution, or could result in a SIGBUS, that could
+ even manifest itself at a different address than the one that was being
+ modified.
+
+ Only use if you are unable to use the regular allocator because of security
+ restrictions and adding exceptions to your application or the system are
+ not possible.
+*/
+
+#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \
+ sljit_update_wx_flags((from), (to), (enable_exec))
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
+{
+ sljit_uw *ptr;
+
+ size += sizeof(sljit_uw);
+ ptr = (sljit_uw*)VirtualAlloc(NULL, size,
+ MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+
+ if (!ptr)
+ return NULL;
+
+ *ptr++ = size;
+
+ return ptr;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
+{
+ sljit_uw start = (sljit_uw)ptr - sizeof(sljit_uw);
+#if defined(SLJIT_DEBUG) && SLJIT_DEBUG
+ sljit_uw page_mask = (sljit_uw)get_page_alignment();
+
+ SLJIT_ASSERT(!(start & page_mask));
+#endif
+ VirtualFree((void*)start, 0, MEM_RELEASE);
+}
+
+static void sljit_update_wx_flags(void *from, void *to, sljit_s32 enable_exec)
+{
+ DWORD oldprot;
+ sljit_uw page_mask = (sljit_uw)get_page_alignment();
+ sljit_uw start = (sljit_uw)from;
+ sljit_uw end = (sljit_uw)to;
+ DWORD prot = enable_exec ? PAGE_EXECUTE : PAGE_READWRITE;
+
+ SLJIT_ASSERT(start < end);
+
+ start &= ~page_mask;
+ end = (end + page_mask) & ~page_mask;
+
+ VirtualProtect((void*)start, end - start, prot, &oldprot);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
+{
+ /* This allocator does not keep unused memory for future allocations. */
+}
diff --git a/src/3rdparty/pcre2/src/sljit/sljitConfig.h b/src/3rdparty/pcre2/src/sljit/sljitConfig.h
index 5fba7aa638..364c8bb788 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitConfig.h
+++ b/src/3rdparty/pcre2/src/sljit/sljitConfig.h
@@ -39,28 +39,6 @@ extern "C" {
*/
/* --------------------------------------------------------------------- */
-/* Architecture */
-/* --------------------------------------------------------------------- */
-
-/* Architecture selection. */
-/* #define SLJIT_CONFIG_X86_32 1 */
-/* #define SLJIT_CONFIG_X86_64 1 */
-/* #define SLJIT_CONFIG_ARM_V5 1 */
-/* #define SLJIT_CONFIG_ARM_V7 1 */
-/* #define SLJIT_CONFIG_ARM_THUMB2 1 */
-/* #define SLJIT_CONFIG_ARM_64 1 */
-/* #define SLJIT_CONFIG_PPC_32 1 */
-/* #define SLJIT_CONFIG_PPC_64 1 */
-/* #define SLJIT_CONFIG_MIPS_32 1 */
-/* #define SLJIT_CONFIG_MIPS_64 1 */
-/* #define SLJIT_CONFIG_RISCV_32 1 */
-/* #define SLJIT_CONFIG_RISCV_64 1 */
-/* #define SLJIT_CONFIG_S390X 1 */
-
-/* #define SLJIT_CONFIG_AUTO 1 */
-/* #define SLJIT_CONFIG_UNSUPPORTED 1 */
-
-/* --------------------------------------------------------------------- */
/* Utilities */
/* --------------------------------------------------------------------- */
@@ -96,7 +74,9 @@ extern "C" {
/* Executable code allocation:
If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should
- define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_EXEC_OFFSET. */
+ define SLJIT_MALLOC_EXEC and SLJIT_FREE_EXEC.
+ Optionally, depending on the implementation used for the allocator,
+ SLJIT_EXEC_OFFSET and SLJIT_UPDATE_WX_FLAGS might also be needed. */
#ifndef SLJIT_EXECUTABLE_ALLOCATOR
/* Enabled by default. */
#define SLJIT_EXECUTABLE_ALLOCATOR 1
diff --git a/src/3rdparty/pcre2/src/sljit/sljitConfigCPU.h b/src/3rdparty/pcre2/src/sljit/sljitConfigCPU.h
new file mode 100644
index 0000000000..2720bdab0b
--- /dev/null
+++ b/src/3rdparty/pcre2/src/sljit/sljitConfigCPU.h
@@ -0,0 +1,188 @@
+/*
+ * Stack-less Just-In-Time compiler
+ *
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SLJIT_CONFIG_CPU_H_
+#define SLJIT_CONFIG_CPU_H_
+
+/* --------------------------------------------------------------------- */
+/* Architecture */
+/* --------------------------------------------------------------------- */
+
+/* Architecture selection. */
+/* #define SLJIT_CONFIG_X86_32 1 */
+/* #define SLJIT_CONFIG_X86_64 1 */
+/* #define SLJIT_CONFIG_ARM_V6 1 */
+/* #define SLJIT_CONFIG_ARM_V7 1 */
+/* #define SLJIT_CONFIG_ARM_THUMB2 1 */
+/* #define SLJIT_CONFIG_ARM_64 1 */
+/* #define SLJIT_CONFIG_PPC_32 1 */
+/* #define SLJIT_CONFIG_PPC_64 1 */
+/* #define SLJIT_CONFIG_MIPS_32 1 */
+/* #define SLJIT_CONFIG_MIPS_64 1 */
+/* #define SLJIT_CONFIG_RISCV_32 1 */
+/* #define SLJIT_CONFIG_RISCV_64 1 */
+/* #define SLJIT_CONFIG_S390X 1 */
+/* #define SLJIT_CONFIG_LOONGARCH_64 */
+
+/* #define SLJIT_CONFIG_AUTO 1 */
+/* #define SLJIT_CONFIG_UNSUPPORTED 1 */
+
+/*****************/
+/* Sanity check. */
+/*****************/
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
+ + (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
+ + (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) \
+ + (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
+ + (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
+ + (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+ + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+ + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+ + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+ + (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
+ + (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \
+ + (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
+ + (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
+ + (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64) \
+ + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
+ + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2
+#error "Multiple architectures are selected"
+#endif
+
+#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
+ && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
+ && !(defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) \
+ && !(defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
+ && !(defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
+ && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+ && !(defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+ && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+ && !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+ && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
+ && !(defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \
+ && !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
+ && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
+ && !(defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64) \
+ && !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) \
+ && !(defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO)
+#if defined SLJIT_CONFIG_AUTO && !SLJIT_CONFIG_AUTO
+#error "An architecture must be selected"
+#else /* SLJIT_CONFIG_AUTO */
+#define SLJIT_CONFIG_AUTO 1
+#endif /* !SLJIT_CONFIG_AUTO */
+#endif /* !SLJIT_CONFIG */
+
+/********************************************************/
+/* Automatic CPU detection (requires compiler support). */
+/********************************************************/
+
+#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO)
+#ifndef _WIN32
+
+#if defined(__i386__) || defined(__i386)
+#define SLJIT_CONFIG_X86_32 1
+#elif defined(__x86_64__)
+#define SLJIT_CONFIG_X86_64 1
+#elif defined(__aarch64__)
+#define SLJIT_CONFIG_ARM_64 1
+#elif defined(__thumb2__)
+#define SLJIT_CONFIG_ARM_THUMB2 1
+#elif (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
+ ((defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7S__)) \
+ || (defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8R__)) \
+ || (defined(__ARM_ARCH_9A__)))
+#define SLJIT_CONFIG_ARM_V7 1
+#elif defined(__arm__) || defined (__ARM__)
+#define SLJIT_CONFIG_ARM_V6 1
+#elif defined(__ppc64__) || defined(__powerpc64__) || (defined(_ARCH_PPC64) && defined(__64BIT__)) || (defined(_POWER) && defined(__64BIT__))
+#define SLJIT_CONFIG_PPC_64 1
+#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER)
+#define SLJIT_CONFIG_PPC_32 1
+#elif defined(__mips__) && !defined(_LP64)
+#define SLJIT_CONFIG_MIPS_32 1
+#elif defined(__mips64)
+#define SLJIT_CONFIG_MIPS_64 1
+#elif defined (__riscv_xlen) && (__riscv_xlen == 32)
+#define SLJIT_CONFIG_RISCV_32 1
+#elif defined (__riscv_xlen) && (__riscv_xlen == 64)
+#define SLJIT_CONFIG_RISCV_64 1
+#elif defined (__loongarch_lp64)
+#define SLJIT_CONFIG_LOONGARCH_64 1
+#elif defined(__s390x__)
+#define SLJIT_CONFIG_S390X 1
+#else
+/* Unsupported architecture */
+#define SLJIT_CONFIG_UNSUPPORTED 1
+#endif
+
+#else /* _WIN32 */
+
+#if defined(_M_X64) || defined(__x86_64__)
+#define SLJIT_CONFIG_X86_64 1
+#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__)
+#define SLJIT_CONFIG_ARM_THUMB2 1
+#elif (defined(_M_ARM) && _M_ARM >= 7)
+#define SLJIT_CONFIG_ARM_V7 1
+#elif defined(_ARM_)
+#define SLJIT_CONFIG_ARM_V6 1
+#elif defined(_M_ARM64) || defined(__aarch64__)
+#define SLJIT_CONFIG_ARM_64 1
+#else
+#define SLJIT_CONFIG_X86_32 1
+#endif
+
+#endif /* !_WIN32 */
+#endif /* SLJIT_CONFIG_AUTO */
+
+#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+#undef SLJIT_EXECUTABLE_ALLOCATOR
+#endif /* SLJIT_CONFIG_UNSUPPORTED */
+
+/******************************/
+/* CPU family type detection. */
+/******************************/
+
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
+ || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+#define SLJIT_CONFIG_ARM_32 1
+#endif
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#define SLJIT_CONFIG_X86 1
+#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+#define SLJIT_CONFIG_ARM 1
+#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define SLJIT_CONFIG_PPC 1
+#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#define SLJIT_CONFIG_MIPS 1
+#elif (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) || (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
+#define SLJIT_CONFIG_RISCV 1
+#elif (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64)
+#define SLJIT_CONFIG_LOONGARCH 1
+#endif
+
+#endif /* SLJIT_CONFIG_CPU_H_ */
diff --git a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h
index cd3ce69734..ce4e7b04ec 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h
+++ b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h
@@ -61,6 +61,8 @@ extern "C" {
SLJIT_BIG_ENDIAN : big endian architecture
SLJIT_UNALIGNED : unaligned memory accesses for non-fpu operations are supported
SLJIT_FPU_UNALIGNED : unaligned memory accesses for fpu operations are supported
+ SLJIT_MASKED_SHIFT : all word shifts are always masked
+ SLJIT_MASKED_SHIFT32 : all 32 bit shifts are always masked
SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information
Constants:
@@ -70,6 +72,8 @@ extern "C" {
SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers
SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers
SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers
+ SLJIT_NUMBER_OF_TEMPORARY_REGISTERS : number of available temporary registers
+ SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS : number of available temporary floating point registers
SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index
SLJIT_F32_SHIFT : the shift required to apply when accessing
a single precision floating point array by index
@@ -79,141 +83,27 @@ extern "C" {
the scratch register index of ecx is stored in this variable
SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET)
SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
+ SLJIT_CONV_MAX_FLOAT : result when a floating point value is converted to integer
+ and the floating point value is higher than the maximum integer value
+ (possible values: SLJIT_CONV_RESULT_MAX_INT or SLJIT_CONV_RESULT_MIN_INT)
+ SLJIT_CONV_MIN_FLOAT : result when a floating point value is converted to integer
+ and the floating point value is lower than the minimum integer value
+ (possible values: SLJIT_CONV_RESULT_MAX_INT or SLJIT_CONV_RESULT_MIN_INT)
+ SLJIT_CONV_NAN_FLOAT : result when a NaN floating point value is converted to integer
+ (possible values: SLJIT_CONV_RESULT_MAX_INT, SLJIT_CONV_RESULT_MIN_INT,
+ or SLJIT_CONV_RESULT_ZERO)
Other macros:
+ SLJIT_TMP_R0 .. R9 : accessing temporary registers
+ SLJIT_TMP_R(i) : accessing temporary registers
+ SLJIT_TMP_FR0 .. FR9 : accessing temporary floating point registers
+ SLJIT_TMP_FR(i) : accessing temporary floating point registers
SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT
SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (platform independent helper)
+ SLJIT_F64_SECOND(reg) : provides the register index of the second 32 bit part of a 64 bit
+ floating point register when SLJIT_HAS_F64_AS_F32_PAIR returns non-zero
*/
-/*****************/
-/* Sanity check. */
-/*****************/
-
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
- + (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
- + (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \
- + (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
- + (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
- + (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
- + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
- + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
- + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
- + (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
- + (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \
- + (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
- + (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
- + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
- + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2
-#error "Multiple architectures are selected"
-#endif
-
-#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
- && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
- && !(defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \
- && !(defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
- && !(defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
- && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
- && !(defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
- && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
- && !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
- && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
- && !(defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \
- && !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
- && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
- && !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) \
- && !(defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO)
-#if defined SLJIT_CONFIG_AUTO && !SLJIT_CONFIG_AUTO
-#error "An architecture must be selected"
-#else /* SLJIT_CONFIG_AUTO */
-#define SLJIT_CONFIG_AUTO 1
-#endif /* !SLJIT_CONFIG_AUTO */
-#endif /* !SLJIT_CONFIG */
-
-/********************************************************/
-/* Automatic CPU detection (requires compiler support). */
-/********************************************************/
-
-#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO)
-
-#ifndef _WIN32
-
-#if defined(__i386__) || defined(__i386)
-#define SLJIT_CONFIG_X86_32 1
-#elif defined(__x86_64__)
-#define SLJIT_CONFIG_X86_64 1
-#elif defined(__arm__) || defined(__ARM__)
-#ifdef __thumb2__
-#define SLJIT_CONFIG_ARM_THUMB2 1
-#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__)
-#define SLJIT_CONFIG_ARM_V7 1
-#else
-#define SLJIT_CONFIG_ARM_V5 1
-#endif
-#elif defined (__aarch64__)
-#define SLJIT_CONFIG_ARM_64 1
-#elif defined(__ppc64__) || defined(__powerpc64__) || (defined(_ARCH_PPC64) && defined(__64BIT__)) || (defined(_POWER) && defined(__64BIT__))
-#define SLJIT_CONFIG_PPC_64 1
-#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER)
-#define SLJIT_CONFIG_PPC_32 1
-#elif defined(__mips__) && !defined(_LP64)
-#define SLJIT_CONFIG_MIPS_32 1
-#elif defined(__mips64)
-#define SLJIT_CONFIG_MIPS_64 1
-#elif defined (__riscv_xlen) && (__riscv_xlen == 32)
-#define SLJIT_CONFIG_RISCV_32 1
-#elif defined (__riscv_xlen) && (__riscv_xlen == 64)
-#define SLJIT_CONFIG_RISCV_64 1
-#elif defined(__s390x__)
-#define SLJIT_CONFIG_S390X 1
-#else
-/* Unsupported architecture */
-#define SLJIT_CONFIG_UNSUPPORTED 1
-#endif
-
-#else /* _WIN32 */
-
-#if defined(_M_X64) || defined(__x86_64__)
-#define SLJIT_CONFIG_X86_64 1
-#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__)
-#define SLJIT_CONFIG_ARM_THUMB2 1
-#elif (defined(_M_ARM) && _M_ARM >= 7)
-#define SLJIT_CONFIG_ARM_V7 1
-#elif defined(_ARM_)
-#define SLJIT_CONFIG_ARM_V5 1
-#elif defined(_M_ARM64) || defined(__aarch64__)
-#define SLJIT_CONFIG_ARM_64 1
-#else
-#define SLJIT_CONFIG_X86_32 1
-#endif
-
-#endif /* !_WIN32 */
-#endif /* SLJIT_CONFIG_AUTO */
-
-#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
-#undef SLJIT_EXECUTABLE_ALLOCATOR
-#endif
-
-/******************************/
-/* CPU family type detection. */
-/******************************/
-
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
- || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
-#define SLJIT_CONFIG_ARM_32 1
-#endif
-
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-#define SLJIT_CONFIG_X86 1
-#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
-#define SLJIT_CONFIG_ARM 1
-#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-#define SLJIT_CONFIG_PPC 1
-#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
-#define SLJIT_CONFIG_MIPS 1
-#elif (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) || (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-#define SLJIT_CONFIG_RISCV 1
-#endif
-
/***********************************************************/
/* Intel Control-flow Enforcement Technology (CET) spport. */
/***********************************************************/
@@ -328,6 +218,10 @@ extern "C" {
/* Instruction cache flush. */
/****************************/
+#ifdef __APPLE__
+#include <AvailabilityMacros.h>
+#endif
+
/*
* TODO:
*
@@ -368,7 +262,7 @@ extern "C" {
/* Not required to implement on archs with unified caches. */
#define SLJIT_CACHE_FLUSH(from, to)
-#elif defined __APPLE__
+#elif defined(__APPLE__) && MAC_OS_X_VERSION_MIN_REQUIRED >= 1050
/* Supported by all macs since Mac OS 10.5.
However, it does not work on non-jailbroken iOS devices,
@@ -433,14 +327,15 @@ typedef signed int sljit_s32;
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
/* Just to have something. */
#define SLJIT_WORD_SHIFT 0
-typedef unsigned long int sljit_uw;
-typedef long int sljit_sw;
+typedef unsigned int sljit_uw;
+typedef int sljit_sw;
#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
&& !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
&& !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
&& !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
- && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
+ && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
+ && !(defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64)
#define SLJIT_32BIT_ARCHITECTURE 1
#define SLJIT_WORD_SHIFT 2
typedef unsigned int sljit_uw;
@@ -476,12 +371,42 @@ typedef double sljit_f64;
#define SLJIT_F32_SHIFT 2
#define SLJIT_F64_SHIFT 3
+#define SLJIT_CONV_RESULT_MAX_INT 0
+#define SLJIT_CONV_RESULT_MIN_INT 1
+#define SLJIT_CONV_RESULT_ZERO 2
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MIN_INT
+#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
+#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT
+#elif (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM)
+#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
+#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
+#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_ZERO
+#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
+#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
+#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MAX_INT
+#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MAX_INT
+#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
+#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
+#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
+#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT
+#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)
+#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
+#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
+#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MAX_INT
+#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
+#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
+#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
+#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT
+#else
+#error "Result for float to integer conversion is not defined"
+#endif
+
#ifndef SLJIT_W
/* Defining long constants. */
-#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
-#define SLJIT_W(w) (w##l)
-#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
#ifdef _WIN64
#define SLJIT_W(w) (w##ll)
#else /* !windows */
@@ -521,9 +446,10 @@ typedef double sljit_f64;
/* Auto detecting mips revision. */
#if (defined __mips_isa_rev) && (__mips_isa_rev >= 6)
#define SLJIT_MIPS_REV 6
-#elif (defined __mips_isa_rev && __mips_isa_rev >= 1) \
- || (defined __clang__ && defined _MIPS_ARCH_OCTEON) \
- || (defined __clang__ && defined _MIPS_ARCH_P5600)
+#elif defined(__mips_isa_rev) && __mips_isa_rev >= 1
+#define SLJIT_MIPS_REV __mips_isa_rev
+#elif defined(__clang__) \
+ && (defined(_MIPS_ARCH_OCTEON) || defined(_MIPS_ARCH_P5600))
/* clang either forgets to define (clang-7) __mips_isa_rev at all
* or sets it to zero (clang-8,-9) for -march=octeon (MIPS64 R2+)
* and -march=p5600 (MIPS32 R5).
@@ -562,7 +488,8 @@ typedef double sljit_f64;
|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
|| (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
- || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
+ || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
+ || (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)
#define SLJIT_UNALIGNED 1
#endif
@@ -574,7 +501,8 @@ typedef double sljit_f64;
|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
|| (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
- || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
+ || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
+ || (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)
#define SLJIT_FPU_UNALIGNED 1
#endif
@@ -594,6 +522,19 @@ typedef double sljit_f64;
#define SLJIT_FUNC
#endif /* !SLJIT_FUNC */
+/* Disable instrumentation for these functions as they may not be sound */
+#ifndef SLJIT_FUNC_ATTRIBUTE
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define SLJIT_FUNC_ATTRIBUTE __attribute__((no_sanitize("memory")))
+#endif /* __has_feature(memory_sanitizer) */
+#endif /* defined(__has_feature) */
+#endif
+
+#ifndef SLJIT_FUNC_ATTRIBUTE
+#define SLJIT_FUNC_ATTRIBUTE
+#endif
+
#ifndef SLJIT_INDIRECT_CALL
#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (!defined _CALL_ELF || _CALL_ELF == 1)) \
|| ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX)
@@ -631,12 +572,14 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR)
SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_EXEC_OFFSET(ptr) sljit_exec_offset(ptr)
-#else
-#define SLJIT_EXEC_OFFSET(ptr) 0
#endif
#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
+#ifndef SLJIT_EXEC_OFFSET
+#define SLJIT_EXEC_OFFSET(ptr) 0
+#endif
+
/**********************************************/
/* Registers and locals offset determination. */
/**********************************************/
@@ -645,15 +588,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
+#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 1
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
+#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1
#define SLJIT_LOCALS_OFFSET_BASE (8 * SSIZE_OF(sw))
#define SLJIT_PREF_SHIFT_REG SLJIT_R2
+#define SLJIT_MASKED_SHIFT 1
+#define SLJIT_MASKED_SHIFT32 1
#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
#define SLJIT_NUMBER_OF_REGISTERS 13
+#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 2
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15
+#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1
#ifndef _WIN64
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
@@ -664,37 +613,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_LOCALS_OFFSET_BASE (4 * SSIZE_OF(sw))
#endif /* !_WIN64 */
#define SLJIT_PREF_SHIFT_REG SLJIT_R3
+#define SLJIT_MASKED_SHIFT 1
+#define SLJIT_MASKED_SHIFT32 1
-#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
-
-#define SLJIT_NUMBER_OF_REGISTERS 12
-#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
-#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14
-#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
-#define SLJIT_LOCALS_OFFSET_BASE 0
-
-#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
+#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 2
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
+#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
#define SLJIT_LOCALS_OFFSET_BASE 0
#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
#define SLJIT_NUMBER_OF_REGISTERS 26
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
+#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
+#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
#define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw))
+#define SLJIT_MASKED_SHIFT 1
+#define SLJIT_MASKED_SHIFT32 1
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
#define SLJIT_NUMBER_OF_REGISTERS 23
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17
+#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18
+#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX)
#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw))
#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
@@ -717,14 +668,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 29
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
#endif
+#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5
+#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 3
+#define SLJIT_MASKED_SHIFT 1
+#define SLJIT_MASKED_SHIFT32 1
#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)
#define SLJIT_NUMBER_OF_REGISTERS 23
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 12
-#define SLJIT_LOCALS_OFFSET_BASE 0
+#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12
+#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
+#define SLJIT_LOCALS_OFFSET_BASE 0
+#define SLJIT_MASKED_SHIFT 1
+#define SLJIT_MASKED_SHIFT32 1
#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
@@ -751,16 +710,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
+#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
+#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1
#define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE
+#define SLJIT_MASKED_SHIFT 1
+
+#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)
+
+#define SLJIT_NUMBER_OF_REGISTERS 23
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
+#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12
+#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
+#define SLJIT_LOCALS_OFFSET_BASE 0
+#define SLJIT_MASKED_SHIFT 1
+#define SLJIT_MASKED_SHIFT32 1
#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+/* Just to have something. */
#define SLJIT_NUMBER_OF_REGISTERS 0
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0
+#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 0
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 0
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
+#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 0
#define SLJIT_LOCALS_OFFSET_BASE 0
#endif
@@ -773,6 +750,45 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \
(SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS)
+/**********************************/
+/* Temporary register management. */
+/**********************************/
+
+#define SLJIT_TMP_REGISTER_BASE (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define SLJIT_TMP_FREGISTER_BASE (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+
+/* WARNING: Accessing temporary registers is not recommended, because they
+ are also used by the JIT compiler for various computations. Using them
+ might have any side effects including incorrect operations and crashes,
+ so use them at your own risk. The machine registers themselves might have
+ limitations, e.g. the r0 register on s390x / ppc cannot be used as
+ base address for memory operations. */
+
+/* Temporary registers */
+#define SLJIT_TMP_R0 (SLJIT_TMP_REGISTER_BASE + 0)
+#define SLJIT_TMP_R1 (SLJIT_TMP_REGISTER_BASE + 1)
+#define SLJIT_TMP_R2 (SLJIT_TMP_REGISTER_BASE + 2)
+#define SLJIT_TMP_R3 (SLJIT_TMP_REGISTER_BASE + 3)
+#define SLJIT_TMP_R4 (SLJIT_TMP_REGISTER_BASE + 4)
+#define SLJIT_TMP_R5 (SLJIT_TMP_REGISTER_BASE + 5)
+#define SLJIT_TMP_R6 (SLJIT_TMP_REGISTER_BASE + 6)
+#define SLJIT_TMP_R7 (SLJIT_TMP_REGISTER_BASE + 7)
+#define SLJIT_TMP_R8 (SLJIT_TMP_REGISTER_BASE + 8)
+#define SLJIT_TMP_R9 (SLJIT_TMP_REGISTER_BASE + 9)
+#define SLJIT_TMP_R(i) (SLJIT_TMP_REGISTER_BASE + (i))
+
+#define SLJIT_TMP_FR0 (SLJIT_TMP_FREGISTER_BASE + 0)
+#define SLJIT_TMP_FR1 (SLJIT_TMP_FREGISTER_BASE + 1)
+#define SLJIT_TMP_FR2 (SLJIT_TMP_FREGISTER_BASE + 2)
+#define SLJIT_TMP_FR3 (SLJIT_TMP_FREGISTER_BASE + 3)
+#define SLJIT_TMP_FR4 (SLJIT_TMP_FREGISTER_BASE + 4)
+#define SLJIT_TMP_FR5 (SLJIT_TMP_FREGISTER_BASE + 5)
+#define SLJIT_TMP_FR6 (SLJIT_TMP_FREGISTER_BASE + 6)
+#define SLJIT_TMP_FR7 (SLJIT_TMP_FREGISTER_BASE + 7)
+#define SLJIT_TMP_FR8 (SLJIT_TMP_FREGISTER_BASE + 8)
+#define SLJIT_TMP_FR9 (SLJIT_TMP_FREGISTER_BASE + 9)
+#define SLJIT_TMP_FR(i) (SLJIT_TMP_FREGISTER_BASE + (i))
+
/********************************/
/* CPU status flags management. */
/********************************/
@@ -781,10 +797,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
|| (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \
|| (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
- || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
+ || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
+ || (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)
#define SLJIT_HAS_STATUS_FLAGS_STATE 1
#endif
+/***************************************/
+/* Floating point register management. */
+/***************************************/
+
+#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \
+ || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define SLJIT_F64_SECOND(reg) \
+ ((reg) + SLJIT_FS0 + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)
+#else /* !SLJIT_CONFIG_ARM_32 && !SLJIT_CONFIG_MIPS_32 */
+#define SLJIT_F64_SECOND(reg) \
+ (reg)
+#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */
+
/*************************************/
/* Debug and verbose related macros. */
/*************************************/
diff --git a/src/3rdparty/pcre2/src/sljit/sljitLir.c b/src/3rdparty/pcre2/src/sljit/sljitLir.c
index abafe1add9..6f19300081 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitLir.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitLir.c
@@ -93,7 +93,8 @@
#define SSIZE_OF(type) ((sljit_s32)sizeof(sljit_ ## type))
#define VARIABLE_FLAG_SHIFT (10)
-#define VARIABLE_FLAG_MASK (0x3f << VARIABLE_FLAG_SHIFT)
+/* All variable flags are even. */
+#define VARIABLE_FLAG_MASK (0x3e << VARIABLE_FLAG_SHIFT)
#define GET_FLAG_TYPE(op) ((op) >> VARIABLE_FLAG_SHIFT)
#define GET_OPCODE(op) \
@@ -122,25 +123,34 @@
#endif
/* Parameter parsing. */
-#define REG_MASK 0x3f
+#define REG_MASK 0x7f
#define OFFS_REG(reg) (((reg) >> 8) & REG_MASK)
#define OFFS_REG_MASK (REG_MASK << 8)
#define TO_OFFS_REG(reg) ((reg) << 8)
-/* When reg cannot be unused. */
-#define FAST_IS_REG(reg) ((reg) <= REG_MASK)
+#define FAST_IS_REG(reg) ((reg) < REG_MASK)
/* Mask for argument types. */
#define SLJIT_ARG_MASK 0x7
#define SLJIT_ARG_FULL_MASK (SLJIT_ARG_MASK | SLJIT_ARG_TYPE_SCRATCH_REG)
-/* Mask for sljit_emit_mem. */
-#define REG_PAIR_MASK 0xff00
-#define REG_PAIR_FIRST(reg) ((reg) & 0xff)
+/* Mask for register pairs. */
+#define REG_PAIR_MASK 0x7f00
+#define REG_PAIR_FIRST(reg) ((reg) & 0x7f)
#define REG_PAIR_SECOND(reg) ((reg) >> 8)
/* Mask for sljit_emit_enter. */
#define SLJIT_KEPT_SAVEDS_COUNT(options) ((options) & 0x3)
+/* Getters for simd operations, which returns with log2(size). */
+#define SLJIT_SIMD_GET_OPCODE(type) ((type) & 0xff)
+#define SLJIT_SIMD_GET_REG_SIZE(type) (((type) >> 12) & 0x3f)
+#define SLJIT_SIMD_GET_ELEM_SIZE(type) (((type) >> 18) & 0x3f)
+#define SLJIT_SIMD_GET_ELEM2_SIZE(type) (((type) >> 24) & 0x3f)
+
+#define SLJIT_SIMD_CHECK_REG(type) (((type) & 0x3f000) >= SLJIT_SIMD_REG_64 && ((type) & 0x3f000) <= SLJIT_SIMD_REG_512)
+#define SLJIT_SIMD_TYPE_MASK(m) ((sljit_s32)0xff000fff & ~(SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST | (m)))
+#define SLJIT_SIMD_TYPE_MASK2(m) ((sljit_s32)0xc0000fff & ~(SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST | (m)))
+
/* Jump flags. */
#define JUMP_LABEL 0x1
#define JUMP_ADDR 0x2
@@ -155,14 +165,14 @@
# define TYPE_SHIFT 13
#endif /* SLJIT_CONFIG_X86 */
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
# define IS_BL 0x4
# define PATCH_B 0x8
-#endif /* SLJIT_CONFIG_ARM_V5 || SLJIT_CONFIG_ARM_V7 */
+#endif /* SLJIT_CONFIG_ARM_V6 || SLJIT_CONFIG_ARM_V6 */
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
# define CPOOL_SIZE 512
-#endif /* SLJIT_CONFIG_ARM_V5 */
+#endif /* SLJIT_CONFIG_ARM_V6 */
#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
# define IS_COND 0x04
@@ -248,15 +258,27 @@
#endif /* SLJIT_CONFIG_RISCV_64 */
#endif /* SLJIT_CONFIG_RISCV */
+#if (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)
+# define IS_COND 0x004
+# define IS_CALL 0x008
+
+# define PATCH_B 0x010
+# define PATCH_J 0x020
+
+# define PATCH_REL32 0x040
+# define PATCH_ABS32 0x080
+# define PATCH_ABS52 0x100
+
+#endif /* SLJIT_CONFIG_LOONGARCH */
/* Stack management. */
#define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \
(((scratches < SLJIT_NUMBER_OF_SCRATCH_REGISTERS ? 0 : (scratches - SLJIT_NUMBER_OF_SCRATCH_REGISTERS)) + \
(saveds) + (sljit_s32)(extra)) * (sljit_s32)sizeof(sljit_sw))
-#define GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, size) \
+#define GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, type) \
(((fscratches < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS ? 0 : (fscratches - SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)) + \
- (fsaveds)) * (sljit_s32)(size))
+ (fsaveds)) * SSIZE_OF(type))
#define ADJUST_LOCAL_OFFSET(p, i) \
if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
@@ -272,25 +294,49 @@
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR)
-#include "sljitProtExecAllocator.c"
-#elif (defined SLJIT_WX_EXECUTABLE_ALLOCATOR && SLJIT_WX_EXECUTABLE_ALLOCATOR)
-#include "sljitWXExecAllocator.c"
+
+#if defined(__NetBSD__)
+#include "allocator_src/sljitProtExecAllocatorNetBSD.c"
#else
-#include "sljitExecAllocator.c"
+#include "allocator_src/sljitProtExecAllocatorPosix.c"
#endif
+#elif (defined SLJIT_WX_EXECUTABLE_ALLOCATOR && SLJIT_WX_EXECUTABLE_ALLOCATOR)
+
+#if defined(_WIN32)
+#include "allocator_src/sljitWXExecAllocatorWindows.c"
+#else
+#include "allocator_src/sljitWXExecAllocatorPosix.c"
#endif
-#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR)
-#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr) + (exec_offset))
#else
-#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr))
+
+#if defined(_WIN32)
+#include "allocator_src/sljitExecAllocatorWindows.c"
+#elif defined(__APPLE__)
+#include "allocator_src/sljitExecAllocatorApple.c"
+#elif defined(__FreeBSD__)
+#include "allocator_src/sljitExecAllocatorFreeBSD.c"
+#else
+#include "allocator_src/sljitExecAllocatorPosix.c"
+#endif
+
#endif
+#else /* !SLJIT_EXECUTABLE_ALLOCATOR */
+
#ifndef SLJIT_UPDATE_WX_FLAGS
#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
#endif
+#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
+
+#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR)
+#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr) + (exec_offset))
+#else
+#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr))
+#endif
+
/* Argument checking features. */
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
@@ -422,9 +468,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
compiler->args_size = -1;
-#endif
+#endif /* SLJIT_CONFIG_X86_32 */
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
compiler->cpool = (sljit_uw*)SLJIT_MALLOC(CPOOL_SIZE * sizeof(sljit_uw)
+ CPOOL_SIZE * sizeof(sljit_u8), allocator_data);
if (!compiler->cpool) {
@@ -435,18 +481,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo
}
compiler->cpool_unique = (sljit_u8*)(compiler->cpool + CPOOL_SIZE);
compiler->cpool_diff = 0xffffffff;
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
compiler->delay_slot = UNMOVABLE_INS;
-#endif
+#endif /* SLJIT_CONFIG_MIPS */
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
|| (defined SLJIT_DEBUG && SLJIT_DEBUG)
compiler->last_flags = 0;
compiler->last_return = -1;
compiler->logical_local_size = 0;
-#endif
+#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */
#if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT)
if (!compiler_initialized) {
@@ -479,7 +525,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compile
SLJIT_FREE(curr, allocator_data);
}
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
SLJIT_FREE(compiler->cpool, allocator_data);
#endif
SLJIT_FREE(compiler, allocator_data);
@@ -802,11 +848,8 @@ static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratch
#define FUNCTION_CHECK_IS_REG(r) \
(((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) \
- || ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0))
-
-#define FUNCTION_CHECK_IS_FREG(fr) \
- (((fr) >= SLJIT_FR0 && (fr) < (SLJIT_FR0 + compiler->fscratches)) \
- || ((fr) > (SLJIT_FS0 - compiler->fsaveds) && (fr) <= SLJIT_FS0))
+ || ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0) \
+ || ((r) >= SLJIT_TMP_REGISTER_BASE && (r) < (SLJIT_TMP_REGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_REGISTERS)))
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#define CHECK_IF_VIRTUAL_REGISTER(p) ((p) <= SLJIT_S3 && (p) >= SLJIT_S8)
@@ -816,7 +859,7 @@ static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratch
static sljit_s32 function_check_src_mem(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
{
- if (compiler->scratches == -1 || compiler->saveds == -1)
+ if (compiler->scratches == -1)
return 0;
if (!(p & SLJIT_MEM))
@@ -853,7 +896,7 @@ static sljit_s32 function_check_src_mem(struct sljit_compiler *compiler, sljit_s
static sljit_s32 function_check_src(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
{
- if (compiler->scratches == -1 || compiler->saveds == -1)
+ if (compiler->scratches == -1)
return 0;
if (FUNCTION_CHECK_IS_REG(p))
@@ -870,7 +913,7 @@ static sljit_s32 function_check_src(struct sljit_compiler *compiler, sljit_s32 p
static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
{
- if (compiler->scratches == -1 || compiler->saveds == -1)
+ if (compiler->scratches == -1)
return 0;
if (FUNCTION_CHECK_IS_REG(p))
@@ -882,19 +925,59 @@ static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p
#define FUNCTION_CHECK_DST(p, i) \
CHECK_ARGUMENT(function_check_dst(compiler, p, i));
+#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \
+ || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+
+#define FUNCTION_CHECK_IS_FREG(fr, is_32) \
+ function_check_is_freg(compiler, (fr), (is_32))
+
+static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32);
+
+#define FUNCTION_FCHECK(p, i, is_32) \
+ CHECK_ARGUMENT(function_fcheck(compiler, (p), (i), (is_32)));
+
+static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i, sljit_s32 is_32)
+{
+ if (compiler->scratches == -1)
+ return 0;
+
+ if (FUNCTION_CHECK_IS_FREG(p, is_32))
+ return (i == 0);
+
+ return function_check_src_mem(compiler, p, i);
+}
+
+#else /* !SLJIT_CONFIG_ARM_32 && !SLJIT_CONFIG_MIPS_32 */
+#define FUNCTION_CHECK_IS_FREG(fr, is_32) \
+ function_check_is_freg(compiler, (fr))
+
+static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr)
+{
+ if (compiler->scratches == -1)
+ return 0;
+
+ return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
+ || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
+ || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
+}
+
+#define FUNCTION_FCHECK(p, i, is_32) \
+ CHECK_ARGUMENT(function_fcheck(compiler, (p), (i)));
+
static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
{
- if (compiler->scratches == -1 || compiler->saveds == -1)
+ if (compiler->scratches == -1)
return 0;
- if (FUNCTION_CHECK_IS_FREG(p))
+ if ((p >= SLJIT_FR0 && p < (SLJIT_FR0 + compiler->fscratches))
+ || (p > (SLJIT_FS0 - compiler->fsaveds) && p <= SLJIT_FS0)
+ || (p >= SLJIT_TMP_FREGISTER_BASE && p < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)))
return (i == 0);
return function_check_src_mem(compiler, p, i);
}
-#define FUNCTION_FCHECK(p, i) \
- CHECK_ARGUMENT(function_fcheck(compiler, p, i));
+#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */
#endif /* SLJIT_ARGUMENT_CHECKS */
@@ -923,23 +1006,35 @@ static void sljit_verbose_reg(struct sljit_compiler *compiler, sljit_s32 r)
{
if (r < (SLJIT_R0 + compiler->scratches))
fprintf(compiler->verbose, "r%d", r - SLJIT_R0);
- else if (r != SLJIT_SP)
+ else if (r < SLJIT_SP)
fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - r);
- else
+ else if (r == SLJIT_SP)
fprintf(compiler->verbose, "sp");
+ else
+ fprintf(compiler->verbose, "t%d", r - SLJIT_TMP_REGISTER_BASE);
}
static void sljit_verbose_freg(struct sljit_compiler *compiler, sljit_s32 r)
{
+#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \
+ || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+ if (r >= SLJIT_F64_SECOND(SLJIT_FR0)) {
+ fprintf(compiler->verbose, "^");
+ r -= SLJIT_F64_SECOND(0);
+ }
+#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */
+
if (r < (SLJIT_FR0 + compiler->fscratches))
fprintf(compiler->verbose, "fr%d", r - SLJIT_FR0);
- else
+ else if (r < SLJIT_TMP_FREGISTER_BASE)
fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - r);
+ else
+ fprintf(compiler->verbose, "ft%d", r - SLJIT_TMP_FREGISTER_BASE);
}
static void sljit_verbose_param(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
{
- if ((p) & SLJIT_IMM)
+ if ((p) == SLJIT_IMM)
fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i));
else if ((p) & SLJIT_MEM) {
if ((p) & REG_MASK) {
@@ -991,9 +1086,17 @@ static const char* op0_names[] = {
};
static const char* op1_names[] = {
+ "mov", "mov", "mov", "mov",
+ "mov", "mov", "mov", "mov",
+ "mov", "clz", "ctz", "rev",
+ "rev", "rev", "rev", "rev"
+};
+
+static const char* op1_types[] = {
"", ".u8", ".s8", ".u16",
".s16", ".u32", ".s32", "32",
- ".p", "not", "clz", "ctz"
+ ".p", "", "", "",
+ ".u16", ".s16", ".u32", ".s32"
};
static const char* op2_names[] = {
@@ -1003,22 +1106,36 @@ static const char* op2_names[] = {
"ashr", "mashr", "rotl", "rotr"
};
-static const char* op_src_names[] = {
+static const char* op_src_dst_names[] = {
"fast_return", "skip_frames_before_fast_return",
"prefetch_l1", "prefetch_l2",
"prefetch_l3", "prefetch_once",
+ "fast_enter", "get_return_address"
};
static const char* fop1_names[] = {
"mov", "conv", "conv", "conv",
- "conv", "conv", "cmp", "neg",
- "abs",
+ "conv", "conv", "conv", "conv",
+ "cmp", "neg", "abs",
+};
+
+static const char* fop1_conv_types[] = {
+ "sw", "s32", "sw", "s32",
+ "uw", "u32"
};
static const char* fop2_names[] = {
"add", "sub", "mul", "div"
};
+static const char* fop2r_names[] = {
+ "copysign"
+};
+
+static const char* simd_op2_names[] = {
+ "and", "or", "xor"
+};
+
static const char* jump_names[] = {
"equal", "not_equal",
"less", "greater_equal",
@@ -1026,7 +1143,8 @@ static const char* jump_names[] = {
"sig_less", "sig_greater_equal",
"sig_greater", "sig_less_equal",
"overflow", "not_overflow",
- "carry", "",
+ "carry", "not_carry",
+ "atomic_stored", "atomic_not_stored",
"f_equal", "f_not_equal",
"f_less", "f_greater_equal",
"f_greater", "f_less_equal",
@@ -1126,7 +1244,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil
fprintf(compiler->verbose, " keep:%d,", SLJIT_KEPT_SAVEDS_COUNT(options));
}
- fprintf(compiler->verbose, "scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n",
+ fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n",
scratches, saveds, fscratches, fsaveds, local_size);
}
#endif
@@ -1198,7 +1316,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return_void(struct sljit_
}
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(compiler->last_return == SLJIT_ARG_TYPE_VOID);
+ CHECK_ARGUMENT(compiler->last_return == SLJIT_ARG_TYPE_RET_VOID);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
@@ -1241,7 +1359,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi
if (GET_OPCODE(op) < SLJIT_MOV_F64) {
FUNCTION_CHECK_SRC(src, srcw);
} else {
- FUNCTION_FCHECK(src, srcw);
+ FUNCTION_FCHECK(src, srcw, op & SLJIT_32);
}
compiler->last_flags = 0;
#endif
@@ -1249,7 +1367,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
if (GET_OPCODE(op) < SLJIT_MOV_F64) {
fprintf(compiler->verbose, " return%s%s ", !(op & SLJIT_32) ? "" : "32",
- op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE]);
+ op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE]);
sljit_verbose_param(compiler, src, srcw);
} else {
fprintf(compiler->verbose, " return%s ", !(op & SLJIT_32) ? ".f64" : ".f32");
@@ -1277,22 +1395,6 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return_to(struct sljit_co
CHECK_RETURN_OK;
}
-static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
-{
-#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- FUNCTION_CHECK_DST(dst, dstw);
- compiler->last_flags = 0;
-#endif
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
- if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- fprintf(compiler->verbose, " fast_enter ");
- sljit_verbose_param(compiler, dst, dstw);
- fprintf(compiler->verbose, "\n");
- }
-#endif
- CHECK_RETURN_OK;
-}
-
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
@@ -1326,16 +1428,16 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
}
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CTZ);
+ CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_REV_S32);
switch (GET_OPCODE(op)) {
- case SLJIT_NOT:
- /* Only SLJIT_32 and SLJIT_SET_Z are allowed. */
- CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK));
- break;
case SLJIT_MOV:
case SLJIT_MOV_U32:
+ case SLJIT_MOV_S32:
+ case SLJIT_MOV32:
case SLJIT_MOV_P:
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
/* Nothing allowed */
CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
break;
@@ -1347,25 +1449,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
FUNCTION_CHECK_DST(dst, dstw);
FUNCTION_CHECK_SRC(src, srcw);
-
- if (GET_OPCODE(op) >= SLJIT_NOT) {
- CHECK_ARGUMENT(src != SLJIT_IMM);
- compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z));
- }
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- if (GET_OPCODE(op) <= SLJIT_MOV_P)
- {
- fprintf(compiler->verbose, " mov%s%s ", !(op & SLJIT_32) ? "" : "32",
- op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE]);
- }
- else
- {
- fprintf(compiler->verbose, " %s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_32) ? "" : "32",
- !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".",
- !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]);
- }
+ fprintf(compiler->verbose, " %s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE],
+ !(op & SLJIT_32) ? "" : "32", op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE]);
sljit_verbose_param(compiler, dst, dstw);
fprintf(compiler->verbose, ", ");
@@ -1376,6 +1464,94 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
CHECK_RETURN_OK;
}
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_reg,
+ sljit_s32 mem_reg)
+{
+ if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+ compiler->skip_checks = 0;
+ CHECK_RETURN_OK;
+ }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_ATOMIC));
+ CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOV_P);
+ CHECK_ARGUMENT(GET_OPCODE(op) != SLJIT_MOV_S8 && GET_OPCODE(op) != SLJIT_MOV_S16 && GET_OPCODE(op) != SLJIT_MOV_S32);
+
+ /* All arguments must be valid registers. */
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(mem_reg) && !CHECK_IF_VIRTUAL_REGISTER(mem_reg));
+
+ if (op == SLJIT_MOV32_U8 || op == SLJIT_MOV32_U16) {
+ /* Only SLJIT_32 is allowed. */
+ CHECK_ARGUMENT(!(op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z)));
+ } else {
+ /* Nothing allowed. */
+ CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
+ }
+
+ compiler->last_flags = 0;
+#endif /* SLJIT_ARGUMENT_CHECKS */
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " atomic_load%s%s ", !(op & SLJIT_32) ? "" : "32",
+ op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE]);
+ sljit_verbose_reg(compiler, dst_reg);
+ fprintf(compiler->verbose, ", [");
+ sljit_verbose_reg(compiler, mem_reg);
+ fprintf(compiler->verbose, "]\n");
+ }
+#endif /* SLJIT_VERBOSE */
+ CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src_reg,
+ sljit_s32 mem_reg,
+ sljit_s32 temp_reg)
+{
+ if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+ compiler->skip_checks = 0;
+ CHECK_RETURN_OK;
+ }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_ATOMIC));
+ CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOV_P);
+ CHECK_ARGUMENT(GET_OPCODE(op) != SLJIT_MOV_S8 && GET_OPCODE(op) != SLJIT_MOV_S16 && GET_OPCODE(op) != SLJIT_MOV_S32);
+
+ /* All arguments must be valid registers. */
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src_reg));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(mem_reg) && !CHECK_IF_VIRTUAL_REGISTER(mem_reg));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(temp_reg) && src_reg != temp_reg);
+
+ CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) || GET_FLAG_TYPE(op) == SLJIT_ATOMIC_STORED);
+
+ if (GET_OPCODE(op) == SLJIT_MOV_U8 || GET_OPCODE(op) == SLJIT_MOV_U16) {
+ /* Only SLJIT_32, SLJIT_ATOMIC_STORED are allowed. */
+ CHECK_ARGUMENT(!(op & SLJIT_SET_Z));
+ } else {
+ /* Only SLJIT_ATOMIC_STORED is allowed. */
+ CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z)));
+ }
+
+ compiler->last_flags = GET_FLAG_TYPE(op) | (op & SLJIT_32);
+#endif /* SLJIT_ARGUMENT_CHECKS */
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " atomic_store%s%s%s ", !(op & SLJIT_32) ? "" : "32",
+ op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & VARIABLE_FLAG_MASK) ? "" : ".stored");
+ sljit_verbose_reg(compiler, src_reg);
+ fprintf(compiler->verbose, ", [");
+ sljit_verbose_reg(compiler, mem_reg);
+ fprintf(compiler->verbose, "], ");
+ sljit_verbose_reg(compiler, temp_reg);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif /* SLJIT_VERBOSE */
+ CHECK_RETURN_OK;
+}
+
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 unset,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src1, sljit_sw src1w,
@@ -1461,28 +1637,33 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler
}
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 src_dst,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
+ sljit_s32 dst_reg,
+ sljit_s32 src1_reg,
+ sljit_s32 src2_reg,
+ sljit_s32 src3, sljit_sw src3w)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_LSHR
|| GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR);
CHECK_ARGUMENT((op & ~(0xff | SLJIT_32 | SLJIT_SHIFT_INTO_NON_ZERO)) == 0);
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src_dst));
- FUNCTION_CHECK_SRC(src1, src1w);
- FUNCTION_CHECK_SRC(src2, src2w);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src1_reg));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src2_reg));
+ FUNCTION_CHECK_SRC(src3, src3w);
+ CHECK_ARGUMENT(dst_reg != src2_reg);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " %s%s.into%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_32) ? "" : "32",
(op & SLJIT_SHIFT_INTO_NON_ZERO) ? ".nz" : "");
- sljit_verbose_reg(compiler, src_dst);
+ sljit_verbose_reg(compiler, dst_reg);
fprintf(compiler->verbose, ", ");
- sljit_verbose_param(compiler, src1, src1w);
+ sljit_verbose_reg(compiler, src1_reg);
fprintf(compiler->verbose, ", ");
- sljit_verbose_param(compiler, src2, src2w);
+ sljit_verbose_reg(compiler, src2_reg);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_param(compiler, src3, src3w);
fprintf(compiler->verbose, "\n");
}
#endif
@@ -1496,19 +1677,16 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_src(struct sljit_compi
CHECK_ARGUMENT(op >= SLJIT_FAST_RETURN && op <= SLJIT_PREFETCH_ONCE);
FUNCTION_CHECK_SRC(src, srcw);
- if (op == SLJIT_FAST_RETURN || op == SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN)
- {
+ if (op == SLJIT_FAST_RETURN || op == SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN) {
CHECK_ARGUMENT(src != SLJIT_IMM);
compiler->last_flags = 0;
- }
- else if (op >= SLJIT_PREFETCH_L1 && op <= SLJIT_PREFETCH_ONCE)
- {
+ } else if (op >= SLJIT_PREFETCH_L1 && op <= SLJIT_PREFETCH_ONCE) {
CHECK_ARGUMENT(src & SLJIT_MEM);
}
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- fprintf(compiler->verbose, " %s ", op_src_names[op - SLJIT_OP_SRC_BASE]);
+ fprintf(compiler->verbose, " %s ", op_src_dst_names[op - SLJIT_OP_SRC_DST_BASE]);
sljit_verbose_param(compiler, src, srcw);
fprintf(compiler->verbose, "\n");
}
@@ -1516,20 +1694,39 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_src(struct sljit_compi
CHECK_RETURN_OK;
}
-static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_s32 reg)
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw)
{
- SLJIT_UNUSED_ARG(reg);
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS);
+ CHECK_ARGUMENT(op >= SLJIT_FAST_ENTER && op <= SLJIT_GET_RETURN_ADDRESS);
+ FUNCTION_CHECK_DST(dst, dstw);
+
+ if (op == SLJIT_FAST_ENTER)
+ compiler->last_flags = 0;
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " %s ", op_src_dst_names[op - SLJIT_OP_SRC_DST_BASE]);
+ sljit_verbose_param(compiler, dst, dstw);
+ fprintf(compiler->verbose, "\n");
+ }
#endif
CHECK_RETURN_OK;
}
-static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_float_register_index(sljit_s32 reg)
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
{
+ SLJIT_UNUSED_ARG(type);
SLJIT_UNUSED_ARG(reg);
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
+ if (type == SLJIT_GP_REGISTER) {
+ CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS)
+ || (reg >= SLJIT_TMP_REGISTER_BASE && reg <= (SLJIT_TMP_REGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_REGISTERS)));
+ } else {
+ CHECK_ARGUMENT(type == SLJIT_FLOAT_REGISTER || ((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6)));
+ CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS)
+ || (reg >= SLJIT_TMP_FREGISTER_BASE && reg <= (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)));
+ }
#endif
CHECK_RETURN_OK;
}
@@ -1583,8 +1780,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compile
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV_F64 && GET_OPCODE(op) <= SLJIT_ABS_F64);
CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
- FUNCTION_FCHECK(src, srcw);
- FUNCTION_FCHECK(dst, dstw);
+ FUNCTION_FCHECK(src, srcw, op & SLJIT_32);
+ FUNCTION_FCHECK(dst, dstw, op & SLJIT_32);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1623,8 +1820,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com
CHECK_ARGUMENT(!(op & SLJIT_SET_Z));
CHECK_ARGUMENT((op & VARIABLE_FLAG_MASK)
|| (GET_FLAG_TYPE(op) >= SLJIT_F_EQUAL && GET_FLAG_TYPE(op) <= SLJIT_ORDERED_LESS_EQUAL));
- FUNCTION_FCHECK(src1, src1w);
- FUNCTION_FCHECK(src2, src2w);
+ FUNCTION_FCHECK(src1, src1w, op & SLJIT_32);
+ FUNCTION_FCHECK(src2, src2w, op & SLJIT_32);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1653,15 +1850,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(str
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
- CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CONV_S32_FROM_F64);
CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
- FUNCTION_FCHECK(src, srcw);
+ FUNCTION_FCHECK(src, srcw, op & SLJIT_32);
FUNCTION_CHECK_DST(dst, dstw);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE],
- (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? ".s32" : ".sw",
+ fop1_conv_types[GET_OPCODE(op) - SLJIT_CONV_SW_FROM_F64],
(op & SLJIT_32) ? ".f32" : ".f64");
sljit_verbose_param(compiler, dst, dstw);
fprintf(compiler->verbose, ", ");
@@ -1672,7 +1868,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(str
CHECK_RETURN_OK;
}
-static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
@@ -1683,16 +1879,15 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_sw(str
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
- CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_SW && GET_OPCODE(op) <= SLJIT_CONV_F64_FROM_S32);
CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
FUNCTION_CHECK_SRC(src, srcw);
- FUNCTION_FCHECK(dst, dstw);
+ FUNCTION_FCHECK(dst, dstw, op & SLJIT_32);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE],
+ fprintf(compiler->verbose, " %s%s.from.%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE],
(op & SLJIT_32) ? ".f32" : ".f64",
- (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? ".s32" : ".sw");
+ fop1_conv_types[GET_OPCODE(op) - SLJIT_CONV_SW_FROM_F64]);
sljit_verbose_fparam(compiler, dst, dstw);
fprintf(compiler->verbose, ", ");
sljit_verbose_param(compiler, src, srcw);
@@ -1707,13 +1902,18 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compile
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
+ if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+ compiler->skip_checks = 0;
+ CHECK_RETURN_OK;
+ }
+
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD_F64 && GET_OPCODE(op) <= SLJIT_DIV_F64);
CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
- FUNCTION_FCHECK(src1, src1w);
- FUNCTION_FCHECK(src2, src2w);
- FUNCTION_FCHECK(dst, dstw);
+ FUNCTION_FCHECK(src1, src1w, op & SLJIT_32);
+ FUNCTION_FCHECK(src2, src2w, op & SLJIT_32);
+ FUNCTION_FCHECK(dst, dstw, op & SLJIT_32);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1729,6 +1929,138 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compile
CHECK_RETURN_OK;
}
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2, sljit_sw src2w)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
+ CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_COPYSIGN_F64);
+ FUNCTION_FCHECK(src1, src1w, op & SLJIT_32);
+ FUNCTION_FCHECK(src2, src2w, op & SLJIT_32);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg, op & SLJIT_32));
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " %s%s ", fop2r_names[GET_OPCODE(op) - SLJIT_FOP2R_BASE], (op & SLJIT_32) ? ".f32" : ".f64");
+ sljit_verbose_freg(compiler, dst_freg);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_fparam(compiler, src1, src1w);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_fparam(compiler, src2, src2w);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fset32(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f32 value)
+{
+ SLJIT_UNUSED_ARG(value);
+
+ if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+ compiler->skip_checks = 0;
+ CHECK_RETURN_OK;
+ }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 1));
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " fset32 ");
+ sljit_verbose_freg(compiler, freg);
+ fprintf(compiler->verbose, ", %f\n", value);
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
+{
+ SLJIT_UNUSED_ARG(value);
+
+ if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+ compiler->skip_checks = 0;
+ CHECK_RETURN_OK;
+ }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " fset64 ");
+ sljit_verbose_freg(compiler, freg);
+ fprintf(compiler->verbose, ", %f\n", value);
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
+ CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_COPY_TO_F64 && GET_OPCODE(op) <= SLJIT_COPY_FROM_F64);
+ CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, op & SLJIT_32));
+
+#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg));
+#else /* !SLJIT_64BIT_ARCHITECTURE */
+ switch (op) {
+ case SLJIT_COPY32_TO_F32:
+ case SLJIT_COPY32_FROM_F32:
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg));
+ break;
+ case SLJIT_COPY_TO_F64:
+ case SLJIT_COPY_FROM_F64:
+ if (reg & REG_PAIR_MASK) {
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(REG_PAIR_FIRST(reg)));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(REG_PAIR_SECOND(reg)));
+
+ if (op == SLJIT_COPY_TO_F64)
+ break;
+
+ CHECK_ARGUMENT(REG_PAIR_FIRST(reg) != REG_PAIR_SECOND(reg));
+ break;
+ }
+
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg));
+ break;
+ }
+#endif /* SLJIT_64BIT_ARCHITECTURE */
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " copy%s_%s_f%s ", (op & SLJIT_32) ? "32" : "",
+ GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? "to" : "from", (op & SLJIT_32) ? "32" : "64");
+
+ sljit_verbose_freg(compiler, freg);
+
+ if (reg & REG_PAIR_MASK) {
+ fprintf(compiler->verbose, ", {");
+ sljit_verbose_reg(compiler, REG_PAIR_FIRST(reg));
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_reg(compiler, REG_PAIR_SECOND(reg));
+ fprintf(compiler->verbose, "}\n");
+ } else {
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_reg(compiler, reg);
+ fprintf(compiler->verbose, "\n");
+ }
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compiler *compiler)
{
SLJIT_UNUSED_ARG(compiler);
@@ -1753,7 +2085,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compil
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|| (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM)
#define CHECK_UNORDERED(type, last_flags) \
- ((((type) & 0xff) == SLJIT_UNORDERED || ((type) & 0xff) == SLJIT_ORDERED) && \
+ ((((type) & 0xfe) == SLJIT_ORDERED) && \
((last_flags) & 0xff) >= SLJIT_UNORDERED && ((last_flags) & 0xff) <= SLJIT_ORDERED_LESS_EQUAL)
#else
#define CHECK_UNORDERED(type, last_flags) 0
@@ -1775,11 +2107,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile
if ((type & 0xff) <= SLJIT_NOT_ZERO)
CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
else if ((compiler->last_flags & 0xff) == SLJIT_CARRY) {
- CHECK_ARGUMENT((type & 0xff) == SLJIT_CARRY || (type & 0xff) == SLJIT_NOT_CARRY);
+ CHECK_ARGUMENT((type & 0xfe) == SLJIT_CARRY);
compiler->last_flags = 0;
} else
- CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff)
- || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)
+ CHECK_ARGUMENT((type & 0xfe) == (compiler->last_flags & 0xff)
|| CHECK_UNORDERED(type, compiler->last_flags));
}
#endif
@@ -1863,10 +2194,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32)));
- CHECK_ARGUMENT((type & 0xff) >= SLJIT_F_EQUAL && (type & 0xff) <= SLJIT_ORDERED_LESS_EQUAL
- && ((type & 0xff) <= SLJIT_ORDERED || sljit_cmp_info(type & 0xff)));
- FUNCTION_FCHECK(src1, src1w);
- FUNCTION_FCHECK(src2, src2w);
+ CHECK_ARGUMENT((type & 0xff) >= SLJIT_F_EQUAL && (type & 0xff) <= SLJIT_ORDERED_LESS_EQUAL);
+ FUNCTION_FCHECK(src1, src1w, type & SLJIT_32);
+ FUNCTION_FCHECK(src2, src2w, type & SLJIT_32);
compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
@@ -1961,9 +2291,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com
if (type <= SLJIT_NOT_ZERO)
CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
else
- CHECK_ARGUMENT(type == (compiler->last_flags & 0xff)
- || (type == SLJIT_NOT_CARRY && (compiler->last_flags & 0xff) == SLJIT_CARRY)
- || (type == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)
+ CHECK_ARGUMENT((type & 0xfe) == (compiler->last_flags & 0xff)
|| CHECK_UNORDERED(type, compiler->last_flags));
FUNCTION_CHECK_DST(dst, dstw);
@@ -1975,7 +2303,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " flags.%s%s%s ",
GET_OPCODE(op) < SLJIT_OP2_BASE ? "mov" : op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE],
- GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_32) ? "32" : ""),
+ GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_32) ? "32" : ""),
!(op & SLJIT_SET_Z) ? "" : ".z");
sljit_verbose_param(compiler, dst, dstw);
fprintf(compiler->verbose, ", %s\n", jump_names[type]);
@@ -1984,9 +2312,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com
CHECK_RETURN_OK;
}
-static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_reg)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
sljit_s32 cond = type & ~SLJIT_32;
@@ -1995,27 +2324,68 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile
CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1);
CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg));
- if (src != SLJIT_IMM) {
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src));
- CHECK_ARGUMENT(srcw == 0);
- }
+ FUNCTION_CHECK_SRC(src1, src1w);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src2_reg));
if (cond <= SLJIT_NOT_ZERO)
CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
- else
- CHECK_ARGUMENT(cond == (compiler->last_flags & 0xff)
- || (cond == SLJIT_NOT_CARRY && (compiler->last_flags & 0xff) == SLJIT_CARRY)
- || (cond == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)
+ else if ((compiler->last_flags & 0xff) == SLJIT_CARRY) {
+ CHECK_ARGUMENT((type & 0xfe) == SLJIT_CARRY);
+ compiler->last_flags = 0;
+ } else
+ CHECK_ARGUMENT((cond & 0xfe) == (compiler->last_flags & 0xff)
|| CHECK_UNORDERED(cond, compiler->last_flags));
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- fprintf(compiler->verbose, " cmov%s %s, ",
+ fprintf(compiler->verbose, " select%s %s, ",
!(type & SLJIT_32) ? "" : "32",
jump_names[type & ~SLJIT_32]);
sljit_verbose_reg(compiler, dst_reg);
fprintf(compiler->verbose, ", ");
- sljit_verbose_param(compiler, src, srcw);
+ sljit_verbose_param(compiler, src1, src1w);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_reg(compiler, src2_reg);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_freg)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ sljit_s32 cond = type & ~SLJIT_32;
+
+ CHECK_ARGUMENT(cond >= SLJIT_EQUAL && cond <= SLJIT_ORDERED_LESS_EQUAL);
+
+ CHECK_ARGUMENT(compiler->fscratches != -1 && compiler->fsaveds != -1);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg, type & SLJIT_32));
+ FUNCTION_FCHECK(src1, src1w, type & SLJIT_32);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src2_freg, type & SLJIT_32));
+
+ if (cond <= SLJIT_NOT_ZERO)
+ CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
+ else if ((compiler->last_flags & 0xff) == SLJIT_CARRY) {
+ CHECK_ARGUMENT((type & 0xfe) == SLJIT_CARRY);
+ compiler->last_flags = 0;
+ } else
+ CHECK_ARGUMENT((cond & 0xfe) == (compiler->last_flags & 0xff)
+ || CHECK_UNORDERED(cond, compiler->last_flags));
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " fselect%s %s, ",
+ !(type & SLJIT_32) ? "" : "32",
+ jump_names[type & ~SLJIT_32]);
+ sljit_verbose_freg(compiler, dst_freg);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_fparam(compiler, src1, src1w);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_freg(compiler, src2_freg);
fprintf(compiler->verbose, "\n");
}
#endif
@@ -2026,33 +2396,35 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler
sljit_s32 reg,
sljit_s32 mem, sljit_sw memw)
{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ sljit_s32 allowed_flags;
+#endif /* SLJIT_ARGUMENT_CHECKS */
+
if (SLJIT_UNLIKELY(compiler->skip_checks)) {
compiler->skip_checks = 0;
CHECK_RETURN_OK;
}
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- sljit_s32 allowed_flags;
-
if (type & SLJIT_MEM_UNALIGNED) {
- CHECK_ARGUMENT(!(type & (SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32)));
- } else if (type & SLJIT_MEM_UNALIGNED_16) {
- CHECK_ARGUMENT(!(type & SLJIT_MEM_UNALIGNED_32));
+ CHECK_ARGUMENT(!(type & (SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)));
+ } else if (type & SLJIT_MEM_ALIGNED_16) {
+ CHECK_ARGUMENT(!(type & SLJIT_MEM_ALIGNED_32));
} else {
- CHECK_ARGUMENT((reg & REG_PAIR_MASK) || (type & SLJIT_MEM_UNALIGNED_32));
+ CHECK_ARGUMENT((reg & REG_PAIR_MASK) || (type & SLJIT_MEM_ALIGNED_32));
}
allowed_flags = SLJIT_MEM_UNALIGNED;
switch (type & 0xff) {
+ case SLJIT_MOV_P:
+ case SLJIT_MOV:
+ allowed_flags |= SLJIT_MEM_ALIGNED_32;
+ /* fallthrough */
case SLJIT_MOV_U32:
case SLJIT_MOV_S32:
case SLJIT_MOV32:
- allowed_flags = SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16;
- break;
- case SLJIT_MOV:
- case SLJIT_MOV_P:
- allowed_flags = SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32;
+ allowed_flags |= SLJIT_MEM_ALIGNED_16;
break;
}
@@ -2079,15 +2451,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler
else
fprintf(compiler->verbose, " %s%s%s",
(type & SLJIT_MEM_STORE) ? "store" : "load",
- !(type & SLJIT_32) ? "" : "32",
- op1_names[(type & 0xff) - SLJIT_OP1_BASE]);
+ !(type & SLJIT_32) ? "" : "32", op1_types[(type & 0xff) - SLJIT_OP1_BASE]);
if (type & SLJIT_MEM_UNALIGNED)
- printf(".un");
- else if (type & SLJIT_MEM_UNALIGNED_16)
- printf(".un16");
- else if (type & SLJIT_MEM_UNALIGNED_32)
- printf(".un32");
+ printf(".unal");
+ else if (type & SLJIT_MEM_ALIGNED_16)
+ printf(".al16");
+ else if (type & SLJIT_MEM_ALIGNED_32)
+ printf(".al32");
if (reg & REG_PAIR_MASK) {
fprintf(compiler->verbose, " {");
@@ -2140,7 +2511,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem_update(struct sljit_c
fprintf(compiler->verbose, " %s%s%s.%s ",
(type & SLJIT_MEM_STORE) ? "store" : "load",
!(type & SLJIT_32) ? "" : "32",
- op1_names[(type & 0xff) - SLJIT_OP1_BASE],
+ op1_types[(type & 0xff) - SLJIT_OP1_BASE],
(type & SLJIT_MEM_POST) ? "post" : "pre");
sljit_verbose_reg(compiler, reg);
@@ -2157,19 +2528,20 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compile
sljit_s32 mem, sljit_sw memw)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64);
if (type & SLJIT_MEM_UNALIGNED) {
- CHECK_ARGUMENT(!(type & (SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32)));
- } else if (type & SLJIT_MEM_UNALIGNED_16) {
- CHECK_ARGUMENT(!(type & SLJIT_MEM_UNALIGNED_32));
+ CHECK_ARGUMENT(!(type & (SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)));
+ } else if (type & SLJIT_MEM_ALIGNED_16) {
+ CHECK_ARGUMENT(!(type & SLJIT_MEM_ALIGNED_32));
} else {
- CHECK_ARGUMENT(type & SLJIT_MEM_UNALIGNED_32);
+ CHECK_ARGUMENT(type & SLJIT_MEM_ALIGNED_32);
CHECK_ARGUMENT(!(type & SLJIT_32));
}
- CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32)));
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg));
+ CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, type & SLJIT_32));
FUNCTION_CHECK_SRC_MEM(mem, memw);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
@@ -2179,11 +2551,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compile
!(type & SLJIT_32) ? "f64" : "f32");
if (type & SLJIT_MEM_UNALIGNED)
- printf(".un");
- else if (type & SLJIT_MEM_UNALIGNED_16)
- printf(".un16");
- else if (type & SLJIT_MEM_UNALIGNED_32)
- printf(".un32");
+ printf(".unal");
+ else if (type & SLJIT_MEM_ALIGNED_16)
+ printf(".al16");
+ else if (type & SLJIT_MEM_ALIGNED_32)
+ printf(".al32");
fprintf(compiler->verbose, " ");
sljit_verbose_freg(compiler, freg);
@@ -2200,10 +2572,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem_update(struct sljit_
sljit_s32 mem, sljit_sw memw)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64);
CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_POST)) == 0);
FUNCTION_CHECK_SRC_MEM(mem, memw);
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, type & SLJIT_32));
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -2226,7 +2599,297 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem_update(struct sljit_
}
#endif
CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD));
+ CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK2(SLJIT_SIMD_STORE)) == 0);
+ CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type));
+ CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) <= SLJIT_SIMD_GET_REG_SIZE(type));
+ CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) <= (srcdst & SLJIT_MEM) ? SLJIT_SIMD_GET_REG_SIZE(type) : 0);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
+ FUNCTION_FCHECK(srcdst, srcdstw, 0);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ if (type & SLJIT_SIMD_TEST)
+ CHECK_RETURN_OK;
+ if (sljit_emit_simd_mov(compiler, type | SLJIT_SIMD_TEST, freg, srcdst, srcdstw) == SLJIT_ERR_UNSUPPORTED) {
+ fprintf(compiler->verbose, " # simd_mem: unsupported form, no instructions are emitted\n");
+ CHECK_RETURN_OK;
+ }
+
+ fprintf(compiler->verbose, " simd_%s.%d.%s%d",
+ (type & SLJIT_SIMD_STORE) ? "store" : "load",
+ (8 << SLJIT_SIMD_GET_REG_SIZE(type)),
+ (type & SLJIT_SIMD_FLOAT) ? "f" : "",
+ (8 << SLJIT_SIMD_GET_ELEM_SIZE(type)));
+
+ if ((type & 0x3f000000) == SLJIT_SIMD_MEM_UNALIGNED)
+ fprintf(compiler->verbose, ".unal ");
+ else
+ fprintf(compiler->verbose, ".al%d ", (8 << SLJIT_SIMD_GET_ELEM2_SIZE(type)));
+
+ sljit_verbose_freg(compiler, freg);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_fparam(compiler, srcdst, srcdstw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD));
+ CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) == 0);
+ CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type));
+ CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (src == SLJIT_IMM) {
+ CHECK_ARGUMENT(srcw == 0);
+ } else {
+ FUNCTION_FCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2);
+ }
+ } else if (src != SLJIT_IMM) {
+ FUNCTION_CHECK_DST(src, srcw);
+ }
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ if (type & SLJIT_SIMD_TEST)
+ CHECK_RETURN_OK;
+ if (sljit_emit_simd_replicate(compiler, type | SLJIT_SIMD_TEST, freg, src, srcw) == SLJIT_ERR_UNSUPPORTED) {
+ fprintf(compiler->verbose, " # simd_dup: unsupported form, no instructions are emitted\n");
+ CHECK_RETURN_OK;
+ }
+
+ fprintf(compiler->verbose, " simd_replicate.%d.%s%d ",
+ (8 << SLJIT_SIMD_GET_REG_SIZE(type)),
+ (type & SLJIT_SIMD_FLOAT) ? "f" : "",
+ (8 << SLJIT_SIMD_GET_ELEM_SIZE(type)));
+
+ sljit_verbose_freg(compiler, freg);
+ fprintf(compiler->verbose, ", ");
+ if (type & SLJIT_SIMD_FLOAT)
+ sljit_verbose_fparam(compiler, src, srcw);
+ else
+ sljit_verbose_param(compiler, src, srcw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg, sljit_s32 lane_index,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD));
+ CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_ZERO | SLJIT_SIMD_LANE_SIGNED | SLJIT_32)) == 0);
+ CHECK_ARGUMENT((type & (SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_ZERO)) != (SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_ZERO));
+ CHECK_ARGUMENT((type & (SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_SIGNED)) != SLJIT_SIMD_LANE_SIGNED);
+ CHECK_ARGUMENT(!(type & SLJIT_SIMD_FLOAT) || !(type & (SLJIT_SIMD_LANE_SIGNED | SLJIT_32)));
+ CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type));
+ CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
+ CHECK_ARGUMENT(!(type & SLJIT_32) || SLJIT_SIMD_GET_ELEM_SIZE(type) <= 2);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
+ CHECK_ARGUMENT(lane_index >= 0 && lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type))));
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ FUNCTION_FCHECK(srcdst, srcdstw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2);
+ } else if ((type & SLJIT_SIMD_STORE) || srcdst != SLJIT_IMM) {
+ FUNCTION_CHECK_DST(srcdst, srcdstw);
+ }
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ if (type & SLJIT_SIMD_TEST)
+ CHECK_RETURN_OK;
+ if (sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_TEST, freg, lane_index, srcdst, srcdstw) == SLJIT_ERR_UNSUPPORTED) {
+ fprintf(compiler->verbose, " # simd_move_lane: unsupported form, no instructions are emitted\n");
+ CHECK_RETURN_OK;
+ }
+
+ fprintf(compiler->verbose, " simd_%s_lane%s%s%s.%d.%s%d ",
+ (type & SLJIT_SIMD_STORE) ? "store" : "load",
+ (type & SLJIT_32) ? "32" : "",
+ (type & SLJIT_SIMD_LANE_ZERO) ? "_z" : "",
+ (type & SLJIT_SIMD_LANE_SIGNED) ? "_s" : "",
+ (8 << SLJIT_SIMD_GET_REG_SIZE(type)),
+ (type & SLJIT_SIMD_FLOAT) ? "f" : "",
+ (8 << SLJIT_SIMD_GET_ELEM_SIZE(type)));
+
+ sljit_verbose_freg(compiler, freg);
+ fprintf(compiler->verbose, "[%d], ", lane_index);
+ if (type & SLJIT_SIMD_FLOAT)
+ sljit_verbose_fparam(compiler, srcdst, srcdstw);
+ else
+ sljit_verbose_param(compiler, srcdst, srcdstw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_s32 src_lane_index)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD));
+ CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) == 0);
+ CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type));
+ CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src, 0));
+ CHECK_ARGUMENT(src_lane_index >= 0 && src_lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type))));
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ if (type & SLJIT_SIMD_TEST)
+ CHECK_RETURN_OK;
+ if (sljit_emit_simd_lane_replicate(compiler, type | SLJIT_SIMD_TEST, freg, src, src_lane_index) == SLJIT_ERR_UNSUPPORTED) {
+ fprintf(compiler->verbose, " # simd_lane_replicate: unsupported form, no instructions are emitted\n");
+ CHECK_RETURN_OK;
+ }
+
+ fprintf(compiler->verbose, " simd_lane_replicate.%d.%s%d ",
+ (8 << SLJIT_SIMD_GET_REG_SIZE(type)),
+ (type & SLJIT_SIMD_FLOAT) ? "f" : "",
+ (8 << SLJIT_SIMD_GET_ELEM_SIZE(type)));
+
+ sljit_verbose_freg(compiler, freg);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_freg(compiler, src);
+ fprintf(compiler->verbose, "[%d]\n", src_lane_index);
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD));
+ CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK2(SLJIT_SIMD_EXTEND_SIGNED)) == 0);
+ CHECK_ARGUMENT((type & (SLJIT_SIMD_EXTEND_SIGNED | SLJIT_SIMD_FLOAT)) != (SLJIT_SIMD_EXTEND_SIGNED | SLJIT_SIMD_FLOAT));
+ CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type));
+ CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
+ CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_ELEM2_SIZE(type));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
+ FUNCTION_FCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ if (type & SLJIT_SIMD_TEST)
+ CHECK_RETURN_OK;
+ if (sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_TEST, freg, src, srcw) == SLJIT_ERR_UNSUPPORTED) {
+ fprintf(compiler->verbose, " # simd_extend: unsupported form, no instructions are emitted\n");
+ CHECK_RETURN_OK;
+ }
+
+ fprintf(compiler->verbose, " simd_load_extend%s.%d.%s%d.%s%d ",
+ (type & SLJIT_SIMD_EXTEND_SIGNED) ? "_s" : "",
+ (8 << SLJIT_SIMD_GET_REG_SIZE(type)),
+ (type & SLJIT_SIMD_FLOAT) ? "f" : "",
+ (8 << SLJIT_SIMD_GET_ELEM2_SIZE(type)),
+ (type & SLJIT_SIMD_FLOAT) ? "f" : "",
+ (8 << SLJIT_SIMD_GET_ELEM_SIZE(type)));
+
+ sljit_verbose_freg(compiler, freg);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_fparam(compiler, src, srcw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD));
+ CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(SLJIT_32)) == SLJIT_SIMD_STORE);
+ CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type));
+ CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
+ FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ if (type & SLJIT_SIMD_TEST)
+ CHECK_RETURN_OK;
+ if (sljit_emit_simd_sign(compiler, type | SLJIT_SIMD_TEST, freg, dst, dstw) == SLJIT_ERR_UNSUPPORTED) {
+ fprintf(compiler->verbose, " # simd_sign: unsupported form, no instructions are emitted\n");
+ CHECK_RETURN_OK;
+ }
+
+ fprintf(compiler->verbose, " simd_store_sign%s.%d.%s%d ",
+ (type & SLJIT_32) ? "32" : "",
+ (8 << SLJIT_SIMD_GET_REG_SIZE(type)),
+ (type & SLJIT_SIMD_FLOAT) ? "f" : "",
+ (8 << SLJIT_SIMD_GET_ELEM_SIZE(type)));
+
+ sljit_verbose_freg(compiler, freg);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_param(compiler, dst, dstw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD));
+ CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) >= SLJIT_SIMD_OP2_AND && (type & SLJIT_SIMD_TYPE_MASK(0)) <= SLJIT_SIMD_OP2_XOR);
+ CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type));
+ CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) <= SLJIT_SIMD_GET_REG_SIZE(type));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg, 0));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src1_freg, 0));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src2_freg, 0));
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ if (type & SLJIT_SIMD_TEST)
+ CHECK_RETURN_OK;
+ if (sljit_emit_simd_op2(compiler, type | SLJIT_SIMD_TEST, dst_freg, src1_freg, src2_freg) == SLJIT_ERR_UNSUPPORTED) {
+ fprintf(compiler->verbose, " # simd_op2: unsupported form, no instructions are emitted\n");
+ CHECK_RETURN_OK;
+ }
+ fprintf(compiler->verbose, " simd_%s.%d.%s%d ",
+ simd_op2_names[SLJIT_SIMD_GET_OPCODE(type) - 1],
+ (8 << SLJIT_SIMD_GET_REG_SIZE(type)),
+ (type & SLJIT_SIMD_FLOAT) ? "f" : "",
+ (8 << SLJIT_SIMD_GET_ELEM_SIZE(type)));
+
+ sljit_verbose_freg(compiler, dst_freg);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_freg(compiler, src1_freg);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_freg(compiler, src2_freg);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
}
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
@@ -2286,7 +2949,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_co
#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */
#define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \
- SLJIT_COMPILE_ASSERT(!(SLJIT_CONV_SW_FROM_F64 & 0x1) && !(SLJIT_CONV_F64_FROM_SW & 0x1), \
+ SLJIT_COMPILE_ASSERT(!(SLJIT_CONV_SW_FROM_F64 & 0x1) && !(SLJIT_CONV_F64_FROM_SW & 0x1) && !(SLJIT_CONV_F64_FROM_UW & 0x1), \
invalid_float_opcodes); \
if (GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CMP_F64) { \
if (GET_OPCODE(op) == SLJIT_CMP_F64) { \
@@ -2301,48 +2964,22 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_co
ADJUST_LOCAL_OFFSET(src, srcw); \
return sljit_emit_fop1_conv_sw_from_f64(compiler, op, dst, dstw, src, srcw); \
} \
- CHECK(check_sljit_emit_fop1_conv_f64_from_sw(compiler, op, dst, dstw, src, srcw)); \
+ if ((GET_OPCODE(op) | 0x1) == SLJIT_CONV_F64_FROM_S32) { \
+ CHECK(check_sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw)); \
+ ADJUST_LOCAL_OFFSET(dst, dstw); \
+ ADJUST_LOCAL_OFFSET(src, srcw); \
+ return sljit_emit_fop1_conv_f64_from_sw(compiler, op, dst, dstw, src, srcw); \
+ } \
+ CHECK(check_sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw)); \
ADJUST_LOCAL_OFFSET(dst, dstw); \
ADJUST_LOCAL_OFFSET(src, srcw); \
- return sljit_emit_fop1_conv_f64_from_sw(compiler, op, dst, dstw, src, srcw); \
+ return sljit_emit_fop1_conv_f64_from_uw(compiler, op, dst, dstw, src, srcw); \
} \
CHECK(check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw)); \
ADJUST_LOCAL_OFFSET(dst, dstw); \
ADJUST_LOCAL_OFFSET(src, srcw);
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
- || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
- || ((defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) && !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)) \
- || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
- || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
-
-static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
- sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
-{
- struct sljit_label *label;
- struct sljit_jump *jump;
- sljit_s32 op = (type & SLJIT_32) ? SLJIT_MOV32 : SLJIT_MOV;
-
- SLJIT_SKIP_CHECKS(compiler);
- jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);
- FAIL_IF(!jump);
-
- SLJIT_SKIP_CHECKS(compiler);
- FAIL_IF(sljit_emit_op1(compiler, op, dst_reg, 0, src, srcw));
-
- SLJIT_SKIP_CHECKS(compiler);
- label = sljit_emit_label(compiler);
- FAIL_IF(!label);
-
- sljit_set_label(jump, label);
- return SLJIT_SUCCESS;
-}
-
-#endif
-
-#if (!(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) || (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)) \
- && !(defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (!(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) || (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6))
static sljit_s32 sljit_emit_mem_unaligned(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 reg,
@@ -2355,7 +2992,7 @@ static sljit_s32 sljit_emit_mem_unaligned(struct sljit_compiler *compiler, sljit
return sljit_emit_op1(compiler, type & (0xff | SLJIT_32), reg, 0, mem, memw);
}
-#endif /* (!SLJIT_CONFIG_MIPS || SLJIT_MIPS_REV >= 6) && !SLJIT_CONFIG_ARM_V5 */
+#endif /* (!SLJIT_CONFIG_MIPS || SLJIT_MIPS_REV >= 6) */
#if (!(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) || (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)) \
&& !(defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
@@ -2401,7 +3038,7 @@ static sljit_s32 sljit_emit_fmem_unaligned(struct sljit_compiler *compiler, slji
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
# include "sljitNativeX86_common.c"
-#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#elif (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
# include "sljitNativeARM_32.c"
#elif (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
# include "sljitNativeARM_32.c"
@@ -2417,6 +3054,8 @@ static sljit_s32 sljit_emit_fmem_unaligned(struct sljit_compiler *compiler, slji
# include "sljitNativeRISCV_common.c"
#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
# include "sljitNativeS390X.c"
+#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)
+# include "sljitNativeLOONGARCH_64.c"
#endif
static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
@@ -2463,8 +3102,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
return sljit_emit_return_void(compiler);
}
+#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
+ && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
+ && !(defined(SLJIT_CONFIG_LOONGARCH_64) && SLJIT_CONFIG_LOONGARCH_64)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2, sljit_sw src2w)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+ ADJUST_LOCAL_OFFSET(src2, src2w);
+
+ SLJIT_SKIP_CHECKS(compiler);
+ return sljit_emit_fop2(compiler, op, dst_freg, 0, src1, src1w, src2, src2w);
+}
+
+#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_S390X && !SLJIT_CONFIG_LOONGARCH_64 */
+
#if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \
- && !(defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)
+ && !(defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
+ && !(defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 src1, sljit_sw src1w,
@@ -2480,18 +3140,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
condition = type & 0xff;
#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
if ((condition == SLJIT_EQUAL || condition == SLJIT_NOT_EQUAL)) {
- if ((src1 & SLJIT_IMM) && !src1w) {
+ if (src1 == SLJIT_IMM && !src1w) {
src1 = src2;
src1w = src2w;
src2 = SLJIT_IMM;
src2w = 0;
}
- if ((src2 & SLJIT_IMM) && !src2w)
+ if (src2 == SLJIT_IMM && !src2w)
return emit_cmp_to0(compiler, type, src1, src1w);
}
#endif
- if (SLJIT_UNLIKELY((src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM))) {
+ if (SLJIT_UNLIKELY(src1 == SLJIT_IMM && src2 != SLJIT_IMM)) {
/* Immediate is preferred as second argument by most architectures. */
switch (condition) {
case SLJIT_LESS:
@@ -2532,7 +3192,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
if (condition <= SLJIT_NOT_ZERO)
flags = SLJIT_SET_Z;
else
- flags = condition << VARIABLE_FLAG_SHIFT;
+ flags = (condition & 0xfe) << VARIABLE_FLAG_SHIFT;
SLJIT_SKIP_CHECKS(compiler);
PTR_FAIL_IF(sljit_emit_op2u(compiler,
@@ -2544,20 +3204,17 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
#endif /* !SLJIT_CONFIG_MIPS */
-#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM)
+#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
{
- if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL)
- return 0;
-
switch (type) {
case SLJIT_UNORDERED_OR_EQUAL:
case SLJIT_ORDERED_NOT_EQUAL:
- return 0;
+ return 1;
}
- return 1;
+ return 0;
}
#endif /* SLJIT_CONFIG_ARM */
@@ -2570,7 +3227,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile
CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w));
SLJIT_SKIP_CHECKS(compiler);
- sljit_emit_fop1(compiler, SLJIT_CMP_F64 | ((type & 0xff) << VARIABLE_FLAG_SHIFT) | (type & SLJIT_32), src1, src1w, src2, src2w);
+ sljit_emit_fop1(compiler, SLJIT_CMP_F64 | ((type & 0xfe) << VARIABLE_FLAG_SHIFT) | (type & SLJIT_32), src1, src1w, src2, src2w);
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_jump(compiler, type);
@@ -2630,507 +3287,174 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler
#endif /* !SLJIT_CONFIG_ARM_64 && !SLJIT_CONFIG_PPC */
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
- && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+ && !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \
+ && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 srcdst, sljit_sw srcdstw)
{
CHECK_ERROR();
- CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
-
- ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
-
- SLJIT_SKIP_CHECKS(compiler);
-
- if (offset != 0)
- return sljit_emit_op2(compiler, SLJIT_ADD, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
- return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0);
-}
-
-#endif
-
-#else /* SLJIT_CONFIG_UNSUPPORTED */
-
-/* Empty function bodies for those machines, which are not (yet) supported. */
-
-SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
-{
- return "unsupported";
-}
-
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data, void *exec_allocator_data)
-{
- SLJIT_UNUSED_ARG(allocator_data);
- SLJIT_UNUSED_ARG(exec_allocator_data);
- SLJIT_UNREACHABLE();
- return NULL;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler)
-{
+ CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNREACHABLE();
-}
-
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNREACHABLE();
-}
-
-SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(size);
- SLJIT_UNREACHABLE();
- return NULL;
-}
-
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
-SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(verbose);
- SLJIT_UNREACHABLE();
-}
-#endif
-
-SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNREACHABLE();
- return NULL;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
-{
- SLJIT_UNUSED_ARG(feature_type);
- SLJIT_UNREACHABLE();
- return 0;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
-{
SLJIT_UNUSED_ARG(type);
- SLJIT_UNREACHABLE();
- return 0;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data)
-{
- SLJIT_UNUSED_ARG(code);
- SLJIT_UNUSED_ARG(exec_allocator_data);
- SLJIT_UNREACHABLE();
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
- sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(options);
- SLJIT_UNUSED_ARG(arg_types);
- SLJIT_UNUSED_ARG(scratches);
- SLJIT_UNUSED_ARG(saveds);
- SLJIT_UNUSED_ARG(fscratches);
- SLJIT_UNUSED_ARG(fsaveds);
- SLJIT_UNUSED_ARG(local_size);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
+ SLJIT_UNUSED_ARG(freg);
+ SLJIT_UNUSED_ARG(srcdst);
+ SLJIT_UNUSED_ARG(srcdstw);
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
- sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(options);
- SLJIT_UNUSED_ARG(arg_types);
- SLJIT_UNUSED_ARG(scratches);
- SLJIT_UNUSED_ARG(saveds);
- SLJIT_UNUSED_ARG(fscratches);
- SLJIT_UNUSED_ARG(fsaveds);
- SLJIT_UNUSED_ARG(local_size);
- SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(op);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(freg);
SLJIT_UNUSED_ARG(src);
SLJIT_UNUSED_ARG(srcw);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(src);
- SLJIT_UNUSED_ARG(srcw);
- SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg, sljit_s32 lane_index,
+ sljit_s32 srcdst, sljit_sw srcdstw)
{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(dst);
- SLJIT_UNUSED_ARG(dstw);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(freg);
+ SLJIT_UNUSED_ARG(lane_index);
+ SLJIT_UNUSED_ARG(srcdst);
+ SLJIT_UNUSED_ARG(srcdstw);
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(op);
- SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_s32 src_lane_index)
{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(op);
- SLJIT_UNUSED_ARG(dst);
- SLJIT_UNUSED_ARG(dstw);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(freg);
SLJIT_UNUSED_ARG(src);
- SLJIT_UNUSED_ARG(srcw);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
+ SLJIT_UNUSED_ARG(src_lane_index);
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(op);
- SLJIT_UNUSED_ARG(dst);
- SLJIT_UNUSED_ARG(dstw);
- SLJIT_UNUSED_ARG(src1);
- SLJIT_UNUSED_ARG(src1w);
- SLJIT_UNUSED_ARG(src2);
- SLJIT_UNUSED_ARG(src2w);
- SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(op);
- SLJIT_UNUSED_ARG(src1);
- SLJIT_UNUSED_ARG(src1w);
- SLJIT_UNUSED_ARG(src2);
- SLJIT_UNUSED_ARG(src2w);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 src_dst,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(op);
- SLJIT_UNUSED_ARG(src_dst);
- SLJIT_UNUSED_ARG(src1);
- SLJIT_UNUSED_ARG(src1w);
- SLJIT_UNUSED_ARG(src2);
- SLJIT_UNUSED_ARG(src2w);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
sljit_s32 src, sljit_sw srcw)
{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(op);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(freg);
SLJIT_UNUSED_ARG(src);
SLJIT_UNUSED_ARG(srcw);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
-{
- SLJIT_UNREACHABLE();
- return reg;
-}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
- void *instruction, sljit_u32 size)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(instruction);
- SLJIT_UNUSED_ARG(size);
- SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(current_flags);
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(op);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(freg);
SLJIT_UNUSED_ARG(dst);
SLJIT_UNUSED_ARG(dstw);
- SLJIT_UNUSED_ARG(src);
- SLJIT_UNUSED_ARG(srcw);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(op);
- SLJIT_UNUSED_ARG(dst);
- SLJIT_UNUSED_ARG(dstw);
- SLJIT_UNUSED_ARG(src1);
- SLJIT_UNUSED_ARG(src1w);
- SLJIT_UNUSED_ARG(src2);
- SLJIT_UNUSED_ARG(src2w);
- SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNREACHABLE();
- return NULL;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(type);
- SLJIT_UNREACHABLE();
- return NULL;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
- sljit_s32 arg_types)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(type);
- SLJIT_UNUSED_ARG(arg_types);
- SLJIT_UNREACHABLE();
- return NULL;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(type);
- SLJIT_UNUSED_ARG(src1);
- SLJIT_UNUSED_ARG(src1w);
- SLJIT_UNUSED_ARG(src2);
- SLJIT_UNUSED_ARG(src2w);
- SLJIT_UNREACHABLE();
- return NULL;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(type);
- SLJIT_UNUSED_ARG(src1);
- SLJIT_UNUSED_ARG(src1w);
- SLJIT_UNUSED_ARG(src2);
- SLJIT_UNUSED_ARG(src2w);
- SLJIT_UNREACHABLE();
- return NULL;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label)
-{
- SLJIT_UNUSED_ARG(jump);
- SLJIT_UNUSED_ARG(label);
- SLJIT_UNREACHABLE();
-}
-
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target)
-{
- SLJIT_UNUSED_ARG(jump);
- SLJIT_UNUSED_ARG(target);
- SLJIT_UNREACHABLE();
-}
+ SLJIT_UNUSED_ARG(dst_freg);
+ SLJIT_UNUSED_ARG(src1_freg);
+ SLJIT_UNUSED_ARG(src2_freg);
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label)
-{
- SLJIT_UNUSED_ARG(put_label);
- SLJIT_UNUSED_ARG(label);
- SLJIT_UNREACHABLE();
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(type);
- SLJIT_UNUSED_ARG(src);
- SLJIT_UNUSED_ARG(srcw);
- SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
- sljit_s32 arg_types,
- sljit_s32 src, sljit_sw srcw)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(type);
- SLJIT_UNUSED_ARG(arg_types);
- SLJIT_UNUSED_ARG(src);
- SLJIT_UNUSED_ARG(srcw);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
+#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_ARM */
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 dst, sljit_sw dstw,
- sljit_s32 type)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(op);
- SLJIT_UNUSED_ARG(dst);
- SLJIT_UNUSED_ARG(dstw);
- SLJIT_UNUSED_ARG(type);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
+#if !(defined(SLJIT_CONFIG_X86) && SLJIT_CONFIG_X86) \
+ && !(defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM) \
+ && !(defined(SLJIT_CONFIG_S390X) && SLJIT_CONFIG_S390X) \
+ && !(defined(SLJIT_CONFIG_LOONGARCH) && SLJIT_CONFIG_LOONGARCH)
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler,
+ sljit_s32 op,
sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
+ sljit_s32 mem_reg)
{
SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(op);
SLJIT_UNUSED_ARG(dst_reg);
- SLJIT_UNUSED_ARG(src);
- SLJIT_UNUSED_ARG(srcw);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
+ SLJIT_UNUSED_ARG(mem_reg);
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(type);
- SLJIT_UNUSED_ARG(reg);
- SLJIT_UNUSED_ARG(mem);
- SLJIT_UNUSED_ARG(memw);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(type);
- SLJIT_UNUSED_ARG(reg);
- SLJIT_UNUSED_ARG(mem);
- SLJIT_UNUSED_ARG(memw);
- SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 mem, sljit_sw memw)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler,
+ sljit_s32 op,
+ sljit_s32 src_reg,
+ sljit_s32 mem_reg,
+ sljit_s32 temp_reg)
{
SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(type);
- SLJIT_UNUSED_ARG(freg);
- SLJIT_UNUSED_ARG(mem);
- SLJIT_UNUSED_ARG(memw);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
+ SLJIT_UNUSED_ARG(op);
+ SLJIT_UNUSED_ARG(src_reg);
+ SLJIT_UNUSED_ARG(mem_reg);
+ SLJIT_UNUSED_ARG(temp_reg);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 mem, sljit_sw memw)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(type);
- SLJIT_UNUSED_ARG(freg);
- SLJIT_UNUSED_ARG(mem);
- SLJIT_UNUSED_ARG(memw);
- SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
+#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_ARM && !SLJIT_CONFIG_S390X && !SLJIT_CONFIG_LOONGARCH */
+
+#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
+ && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(dst);
- SLJIT_UNUSED_ARG(dstw);
- SLJIT_UNUSED_ARG(offset);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
+ CHECK_ERROR();
+ CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw initval)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(dst);
- SLJIT_UNUSED_ARG(dstw);
- SLJIT_UNUSED_ARG(initval);
- SLJIT_UNREACHABLE();
- return NULL;
-}
+ ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(dst);
- SLJIT_UNUSED_ARG(dstw);
- return NULL;
-}
+ SLJIT_SKIP_CHECKS(compiler);
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
-{
- SLJIT_UNUSED_ARG(addr);
- SLJIT_UNUSED_ARG(new_target);
- SLJIT_UNUSED_ARG(executable_offset);
- SLJIT_UNREACHABLE();
+ if (offset != 0)
+ return sljit_emit_op2(compiler, SLJIT_ADD, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
+ return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0);
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
-{
- SLJIT_UNUSED_ARG(addr);
- SLJIT_UNUSED_ARG(new_constant);
- SLJIT_UNUSED_ARG(executable_offset);
- SLJIT_UNREACHABLE();
-}
+#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_ARM_64 */
#endif /* !SLJIT_CONFIG_UNSUPPORTED */
diff --git a/src/3rdparty/pcre2/src/sljit/sljitLir.h b/src/3rdparty/pcre2/src/sljit/sljitLir.h
index c6a0832ef8..2ba6683c74 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitLir.h
+++ b/src/3rdparty/pcre2/src/sljit/sljitLir.h
@@ -72,6 +72,7 @@
#include "sljitConfigPre.h"
#endif /* SLJIT_HAVE_CONFIG_PRE */
+#include "sljitConfigCPU.h"
#include "sljitConfig.h"
/* The following header file defines useful macros for fine tuning
@@ -107,9 +108,9 @@ extern "C" {
/* Cannot allocate executable memory.
Only sljit_generate_code() returns with this error code. */
#define SLJIT_ERR_EX_ALLOC_FAILED 3
-/* Return value for SLJIT_CONFIG_UNSUPPORTED placeholder architecture. */
+/* Unsupported instruction form. */
#define SLJIT_ERR_UNSUPPORTED 4
-/* An ivalid argument is passed to any SLJIT function. */
+/* An invalid argument is passed to any SLJIT function. */
#define SLJIT_ERR_BAD_ARGUMENT 5
/* --------------------------------------------------------------------- */
@@ -127,40 +128,40 @@ extern "C" {
is the first saved register, the one before the last is the second saved
register, and so on.
- If an architecture provides two scratch and three saved registers,
- its scratch and saved register sets are the following:
+ For example, in an architecture with only five registers (A-E), if two
+ are scratch and three saved registers, they will be defined as follows:
- R0 | | R0 is always a scratch register
- R1 | | R1 is always a scratch register
- [R2] | S2 | R2 and S2 represent the same physical register
- [R3] | S1 | R3 and S1 represent the same physical register
- [R4] | S0 | R4 and S0 represent the same physical register
+ A | R0 | | R0 always represent scratch register A
+ B | R1 | | R1 always represent scratch register B
+ C | [R2] | S2 | R2 and S2 represent the same physical register C
+ D | [R3] | S1 | R3 and S1 represent the same physical register D
+ E | [R4] | S0 | R4 and S0 represent the same physical register E
- Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and
- SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture.
+ Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS will be 2 and
+ SLJIT_NUMBER_OF_SAVED_REGISTERS will be 3.
- Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 12
+ Note: For all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 12
and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 6. However, 6 registers
are virtual on x86-32. See below.
The purpose of this definition is convenience: saved registers can
- be used as extra scratch registers. For example four registers can
- be specified as scratch registers and the fifth one as saved register
- on the CPU above and any user code which requires four scratch
- registers can run unmodified. The SLJIT compiler automatically saves
- the content of the two extra scratch register on the stack. Scratch
- registers can also be preserved by saving their value on the stack
- but this needs to be done manually.
+ be used as extra scratch registers. For example, building in the
+ previous example, four registers can be specified as scratch registers
+ and the fifth one as saved register, allowing any user code which requires
+ four scratch registers to run unmodified. The SLJIT compiler automatically
+ saves the content of the two extra scratch register on the stack. Scratch
+ registers can also be preserved by saving their value on the stack but
+ that needs to be done manually.
Note: To emphasize that registers assigned to R2-R4 are saved
registers, they are enclosed by square brackets.
- Note: sljit_emit_enter and sljit_set_context defines whether a register
- is S or R register. E.g: when 3 scratches and 1 saved is mapped
- by sljit_emit_enter, the allowed register set will be: R0-R2 and
- S0. Although S2 is mapped to the same position as R2, it does not
- available in the current configuration. Furthermore the S1 register
- is not available at all.
+ Note: sljit_emit_enter and sljit_set_context define whether a register
+ is S or R register. E.g: if in the previous example 3 scratches and
+ 1 saved are mapped by sljit_emit_enter, the allowed register set
+ will be: R0-R2 and S0. Although S2 is mapped to the same register
+ than R2, it is not available in that configuration. Furthermore
+ the S1 register cannot be used at all.
*/
/* Scratch registers. */
@@ -209,7 +210,7 @@ extern "C" {
/* The SLJIT_SP provides direct access to the linear stack space allocated by
sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP).
The immediate offset is extended by the relative stack offset automatically.
- The sljit_get_local_base can be used to obtain the real address of a value. */
+ sljit_get_local_base can be used to obtain the real address of a value. */
#define SLJIT_SP (SLJIT_NUMBER_OF_REGISTERS + 1)
/* Return with machine word. */
@@ -221,7 +222,7 @@ extern "C" {
/* --------------------------------------------------------------------- */
/* Each floating point register can store a 32 or a 64 bit precision
- value. The FR and FS register sets are overlap in the same way as R
+ value. The FR and FS register sets overlap in the same way as R
and S register sets. See above. */
/* Floating point scratch registers. */
@@ -231,6 +232,10 @@ extern "C" {
#define SLJIT_FR3 4
#define SLJIT_FR4 5
#define SLJIT_FR5 6
+#define SLJIT_FR6 7
+#define SLJIT_FR7 8
+#define SLJIT_FR8 9
+#define SLJIT_FR9 10
/* All FR registers provided by the architecture can be accessed by SLJIT_FR(i)
The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */
#define SLJIT_FR(i) (1 + (i))
@@ -242,6 +247,10 @@ extern "C" {
#define SLJIT_FS3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3)
#define SLJIT_FS4 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4)
#define SLJIT_FS5 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5)
+#define SLJIT_FS6 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 6)
+#define SLJIT_FS7 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 7)
+#define SLJIT_FS8 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 8)
+#define SLJIT_FS9 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 9)
/* All S registers provided by the architecture can be accessed by SLJIT_FS(i)
The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */
#define SLJIT_FS(i) (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i))
@@ -260,23 +269,39 @@ extern "C" {
/* The following argument type definitions are used by sljit_emit_enter,
sljit_set_context, sljit_emit_call, and sljit_emit_icall functions.
- As for sljit_emit_call and sljit_emit_icall, the first integer argument
+ For sljit_emit_call and sljit_emit_icall, the first integer argument
must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on.
Similarly the first floating point argument must be placed into SLJIT_FR0,
the second one into SLJIT_FR1, and so on.
- As for sljit_emit_enter, the integer arguments can be stored in scratch
- or saved registers. The first integer argument without _R postfix is
- stored in SLJIT_S0, the next one in SLJIT_S1, and so on. The integer
- arguments with _R postfix are placed into scratch registers. The index
- of the scratch register is the count of the previous integer arguments
- starting from SLJIT_R0. The floating point arguments are always placed
- into SLJIT_FR0, SLJIT_FR1, and so on.
+ For sljit_emit_enter, the integer arguments can be stored in scratch
+ or saved registers. Scratch registers are identified by a _R suffix.
- Note: if a function is called by sljit_emit_call/sljit_emit_icall and
- an argument is stored in a scratch register by sljit_emit_enter,
- that argument uses the same scratch register index for both
- integer and floating point arguments.
+ If only saved registers are used, then the allocation mirrors what is
+ done for the "call" functions but using saved registers, meaning that
+ the first integer argument goes to SLJIT_S0, the second one goes into
+ SLJIT_S1, and so on.
+
+ If scratch registers are used, then the way the integer registers are
+ allocated changes so that SLJIT_S0, SLJIT_S1, etc; will be assigned
+ only for the arguments not using scratch registers, while SLJIT_R<n>
+ will be used for the ones using scratch registers.
+
+ Furthermore, the index (shown as "n" above) that will be used for the
+ scratch register depends on how many previous integer registers
+ (scratch or saved) were used already, starting with SLJIT_R0.
+ Eventhough some indexes will be likely skipped, they still need to be
+ accounted for in the scratches parameter of sljit_emit_enter. See below
+ for some examples.
+
+ The floating point arguments always use scratch registers (but not the
+ _R suffix like the integer arguments) and must use SLJIT_FR0, SLJIT_FR1,
+ just like in the "call" functions.
+
+ Note: the mapping for scratch registers is part of the compiler context
+ and therefore a new context after sljit_emit_call/sljit_emit_icall
+ could remove access to some scratch registers that were used as
+ arguments.
Example function definition:
sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a,
@@ -288,29 +313,33 @@ extern "C" {
| SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4)
Short form of argument type definition:
- SLJIT_ARGS4(32, P, F64, 32, F32)
+ SLJIT_ARGS4(F32, P, F64, 32, F32)
Argument passing:
arg_a must be placed in SLJIT_R0
- arg_c must be placed in SLJIT_R1
arg_b must be placed in SLJIT_FR0
+ arg_c must be placed in SLJIT_R1
arg_d must be placed in SLJIT_FR1
Examples for argument processing by sljit_emit_enter:
- SLJIT_ARGS4(VOID, P, 32_R, F32, W)
+ SLJIT_ARGS4V(P, 32_R, F32, W)
Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1
+ The type of the result is void.
- SLJIT_ARGS4(VOID, W, W_R, W, W_R)
+ SLJIT_ARGS4(F32, W, W_R, W, W_R)
Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3
+ The type of the result is sljit_f32.
- SLJIT_ARGS4(VOID, F64, W, F32, W_R)
+ SLJIT_ARGS4(P, W, F32, P_R)
Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1
+ The type of the result is pointer.
Note: it is recommended to pass the scratch arguments first
followed by the saved arguments:
- SLJIT_ARGS4(VOID, W_R, W_R, W, W)
+ SLJIT_ARGS4(W, W_R, W_R, W, W)
Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1
+ The type of the result is sljit_sw / sljit_uw.
*/
/* The following flag is only allowed for the integer arguments of
@@ -318,21 +347,21 @@ extern "C" {
stored in a scratch register instead of a saved register. */
#define SLJIT_ARG_TYPE_SCRATCH_REG 0x8
-/* Void result, can only be used by SLJIT_ARG_RETURN. */
-#define SLJIT_ARG_TYPE_VOID 0
+/* No return value, only supported by SLJIT_ARG_RETURN. */
+#define SLJIT_ARG_TYPE_RET_VOID 0
/* Machine word sized integer argument or result. */
-#define SLJIT_ARG_TYPE_W 1
+#define SLJIT_ARG_TYPE_W 1
#define SLJIT_ARG_TYPE_W_R (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG)
/* 32 bit integer argument or result. */
-#define SLJIT_ARG_TYPE_32 2
+#define SLJIT_ARG_TYPE_32 2
#define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG)
/* Pointer sized integer argument or result. */
-#define SLJIT_ARG_TYPE_P 3
+#define SLJIT_ARG_TYPE_P 3
#define SLJIT_ARG_TYPE_P_R (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG)
/* 64 bit floating point argument or result. */
-#define SLJIT_ARG_TYPE_F64 4
+#define SLJIT_ARG_TYPE_F64 4
/* 32 bit floating point argument or result. */
-#define SLJIT_ARG_TYPE_F32 5
+#define SLJIT_ARG_TYPE_F32 5
#define SLJIT_ARG_SHIFT 4
#define SLJIT_ARG_RETURN(type) (type)
@@ -345,24 +374,40 @@ extern "C" {
can be shortened to:
SLJIT_ARGS1(W, F32)
+
+ Another example where no value is returned:
+ SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_RET_VOID) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W_R, 1)
+
+ can be shortened to:
+ SLJIT_ARGS1V(W_R)
*/
#define SLJIT_ARG_TO_TYPE(type) SLJIT_ARG_TYPE_ ## type
#define SLJIT_ARGS0(ret) \
SLJIT_ARG_RETURN(SLJIT_ARG_TO_TYPE(ret))
+#define SLJIT_ARGS0V() \
+ SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_RET_VOID)
#define SLJIT_ARGS1(ret, arg1) \
(SLJIT_ARGS0(ret) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1))
+#define SLJIT_ARGS1V(arg1) \
+ (SLJIT_ARGS0V() | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1))
#define SLJIT_ARGS2(ret, arg1, arg2) \
(SLJIT_ARGS1(ret, arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2))
+#define SLJIT_ARGS2V(arg1, arg2) \
+ (SLJIT_ARGS1V(arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2))
#define SLJIT_ARGS3(ret, arg1, arg2, arg3) \
(SLJIT_ARGS2(ret, arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3))
+#define SLJIT_ARGS3V(arg1, arg2, arg3) \
+ (SLJIT_ARGS2V(arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3))
#define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4) \
(SLJIT_ARGS3(ret, arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4))
+#define SLJIT_ARGS4V(arg1, arg2, arg3, arg4) \
+ (SLJIT_ARGS3V(arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4))
/* --------------------------------------------------------------------- */
/* Main structures and functions */
@@ -457,7 +502,7 @@ struct sljit_compiler {
sljit_s32 mode32;
#endif
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
/* Constant pool handling. */
sljit_uw *cpool;
sljit_u8 *cpool_unique;
@@ -468,10 +513,10 @@ struct sljit_compiler {
sljit_uw patches;
#endif
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
/* Temporary fields. */
sljit_uw shift_imm;
-#endif /* SLJIT_CONFIG_ARM_V5 || SLJIT_CONFIG_ARM_V7 */
+#endif /* SLJIT_CONFIG_ARM_V6 || SLJIT_CONFIG_ARM_V6 */
#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)
sljit_uw args_size;
@@ -501,6 +546,11 @@ struct sljit_compiler {
sljit_s32 mode;
#endif
+#if (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)
+ sljit_s32 cache_arg;
+ sljit_sw cache_argw;
+#endif
+
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
FILE* verbose;
#endif
@@ -558,8 +608,7 @@ static SLJIT_INLINE sljit_s32 sljit_get_compiler_error(struct sljit_compiler *co
after the code is compiled. */
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler);
-/*
- Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit,
+/* Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit,
and <= 128 bytes on 64 bit architectures. The memory area is owned by the
compiler, and freed by sljit_free_compiler. The returned pointer is
sizeof(sljit_sw) aligned. Excellent for allocating small blocks during
@@ -567,19 +616,21 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compi
to contain at most 16 pointers. If the size is outside of the range,
the function will return with NULL. However, this return value does not
indicate that there is no more memory (does not set the current error code
- of the compiler to out-of-memory status).
-*/
+ of the compiler to out-of-memory status). */
SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size);
+/* Returns the allocator data passed to sljit_create_compiler. These pointers
+ may contain context data even if the normal/exec allocator ignores it. */
+static SLJIT_INLINE void* sljit_get_allocator_data(struct sljit_compiler *compiler) { return compiler->allocator_data; }
+static SLJIT_INLINE void* sljit_get_exec_allocator_data(struct sljit_compiler *compiler) { return compiler->exec_allocator_data; }
+
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
/* Passing NULL disables verbose. */
SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose);
#endif
-/*
- Create executable code from the instruction stream. This is the final step
- of the code generation so no more instructions can be emitted after this call.
-*/
+/* Create executable code from the instruction stream. This is the final step
+ of the code generation so no more instructions can be emitted after this call. */
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler);
@@ -587,8 +638,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data);
-/*
- When the protected executable allocator is used the JIT code is mapped
+/* When the protected executable allocator is used the JIT code is mapped
twice. The first mapping has read/write and the second mapping has read/exec
permissions. This function returns with the relative offset of the executable
mapping using the writable mapping as the base after the machine code is
@@ -596,16 +646,13 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_d
allocator, since it uses only one mapping with read/write/exec permissions.
Dynamic code modifications requires this value.
- Before a successful code generation, this function returns with 0.
-*/
+ Before a successful code generation, this function returns with 0. */
static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *compiler) { return compiler->executable_offset; }
-/*
- The executable memory consumption of the generated code can be retrieved by
+/* The executable memory consumption of the generated code can be retrieved by
this function. The returned value can be used for statistical purposes.
- Before a successful code generation, this function returns with 0.
-*/
+ Before a successful code generation, this function returns with 0. */
static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; }
/* Returns with non-zero if the feature or limitation type passed as its
@@ -628,30 +675,49 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler
#define SLJIT_HAS_CLZ 3
/* [Emulated] Count trailing zero is supported. */
#define SLJIT_HAS_CTZ 4
+/* [Emulated] Reverse the order of bytes is supported. */
+#define SLJIT_HAS_REV 5
/* [Emulated] Rotate left/right is supported. */
-#define SLJIT_HAS_ROT 5
+#define SLJIT_HAS_ROT 6
/* [Emulated] Conditional move is supported. */
-#define SLJIT_HAS_CMOV 6
+#define SLJIT_HAS_CMOV 7
/* [Emulated] Prefetch instruction is available (emulated as a nop). */
-#define SLJIT_HAS_PREFETCH 7
+#define SLJIT_HAS_PREFETCH 8
+/* [Emulated] Copy from/to f32 operation is available (see sljit_emit_fcopy). */
+#define SLJIT_HAS_COPY_F32 9
+/* [Emulated] Copy from/to f64 operation is available (see sljit_emit_fcopy). */
+#define SLJIT_HAS_COPY_F64 10
+/* [Not emulated] The 64 bit floating point registers can be used as
+ two separate 32 bit floating point registers (e.g. ARM32). The
+ second 32 bit part can be accessed by SLJIT_F64_SECOND. */
+#define SLJIT_HAS_F64_AS_F32_PAIR 11
+/* [Not emulated] Some SIMD operations are supported by the compiler. */
+#define SLJIT_HAS_SIMD 12
+/* [Not emulated] SIMD registers are mapped to a pair of double precision
+ floating point registers. E.g. passing either SLJIT_FR0 or SLJIT_FR1 to
+ a simd operation represents the same 128 bit register, and both SLJIT_FR0
+ and SLJIT_FR1 are overwritten. */
+#define SLJIT_SIMD_REGS_ARE_PAIRS 13
+/* [Not emulated] Atomic support is available (fine-grained). */
+#define SLJIT_HAS_ATOMIC 14
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
-/* [Not emulated] SSE2 support is available on x86. */
-#define SLJIT_HAS_SSE2 100
+/* [Not emulated] AVX support is available on x86. */
+#define SLJIT_HAS_AVX 100
+/* [Not emulated] AVX2 support is available on x86. */
+#define SLJIT_HAS_AVX2 101
#endif
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type);
/* If type is between SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL,
- sljit_cmp_info returns one, if the cpu supports the passed floating
- point comparison type.
+ sljit_cmp_info returns with:
+ zero - if the cpu supports the floating point comparison type
+ one - if the comparison requires two machine instructions
+ two - if the comparison requires more than two machine instructions
- If type is SLJIT_UNORDERED or SLJIT_ORDERED, sljit_cmp_info returns
- one, if the cpu supports checking the unordered comparison result
- regardless of the comparison type passed to the comparison instruction.
- The returned value is always one, if there is at least one type between
- SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL where sljit_cmp_info
- returns with a zero value.
+ When the result is non-zero, it is recommended to avoid
+ using the specified comparison type if it is easy to do so.
Otherwise it returns zero. */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type);
@@ -662,7 +728,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type);
/*
The executable code is a function from the viewpoint of the C
- language. The function calls must obey to the ABI (Application
+ language. The function calls must conform to the ABI (Application
Binary Interface) of the platform, which specify the purpose of
machine registers and stack handling among other things. The
sljit_emit_enter function emits the necessary instructions for
@@ -721,7 +787,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type);
#define SLJIT_ENTER_REG_ARG 0x00000004
/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */
-#define SLJIT_MAX_LOCAL_SIZE 65536
+#define SLJIT_MAX_LOCAL_SIZE 1048576
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
@@ -732,9 +798,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
by sljit_emit_enter. Several functions (such as sljit_emit_return)
requires this context to be able to generate the appropriate code.
However, some code fragments (compiled separately) may have no
- normal entry point so their context is unknown for the compiler.
+ normal entry point so their context is unknown to the compiler.
- The sljit_set_context and sljit_emit_enter have the same arguments,
+ sljit_set_context and sljit_emit_enter have the same arguments,
but sljit_set_context does not generate any machine code.
Note: every call of sljit_emit_enter and sljit_set_context overwrites
@@ -767,28 +833,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
sljit_s32 src, sljit_sw srcw);
-/* Generating entry and exit points for fast call functions (see SLJIT_FAST_CALL).
- Both sljit_emit_fast_enter and SLJIT_FAST_RETURN operations preserve the
- values of all registers and stack frame. The return address is stored in the
- dst argument of sljit_emit_fast_enter, and this return address can be passed
- to SLJIT_FAST_RETURN to continue the execution after the fast call.
-
- Fast calls are cheap operations (usually only a single call instruction is
- emitted) but they do not preserve any registers. However the callee function
- can freely use / update any registers and the local area which can be
- efficiently exploited by various optimizations. Registers can be saved
- and restored manually if needed.
-
- Although returning to different address by SLJIT_FAST_RETURN is possible,
- this address usually cannot be predicted by the return address predictor of
- modern CPUs which may reduce performance. Furthermore certain security
- enhancement technologies such as Intel Control-flow Enforcement Technology
- (CET) may disallow returning to a different address.
-
- Flags: - (does not modify flags). */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw);
-
/*
Source and destination operands for arithmetical instructions
imm - a simple immediate value (cannot be used as a destination)
@@ -816,7 +860,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
Note: Different architectures have different addressing limitations.
A single instruction is enough for the following addressing
- modes. Other adrressing modes are emulated by instruction
+ modes. Other addressing modes are emulated by instruction
sequences. This information could help to improve those code
generators which focuses only a few architectures.
@@ -847,6 +891,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
s390x: [reg+imm], -2^19 <= imm < 2^19
[reg+reg] is supported
Write-back is not supported
+ loongarch: [reg+imm], -2048 <= imm <= 2047
+ [reg+reg] is supported
+ Write-back is not supported
*/
/* Macros for specifying operand types. */
@@ -854,9 +901,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
#define SLJIT_MEM0() (SLJIT_MEM)
#define SLJIT_MEM1(r1) (SLJIT_MEM | (r1))
#define SLJIT_MEM2(r1, r2) (SLJIT_MEM | (r1) | ((r2) << 8))
-#define SLJIT_IMM 0x40
+#define SLJIT_IMM 0x7f
#define SLJIT_REG_PAIR(r1, r2) ((r1) | ((r2) << 8))
+/* Macros for checking operand types (only for valid arguments). */
+#define SLJIT_IS_REG(arg) ((arg) > 0 && (arg) < SLJIT_IMM)
+#define SLJIT_IS_MEM(arg) ((arg) & SLJIT_MEM)
+#define SLJIT_IS_MEM0(arg) ((arg) == SLJIT_MEM)
+#define SLJIT_IS_MEM1(arg) ((arg) > SLJIT_MEM && (arg) < (SLJIT_MEM << 1))
+#define SLJIT_IS_MEM2(arg) (((arg) & SLJIT_MEM) && (arg) >= (SLJIT_MEM << 1))
+#define SLJIT_IS_IMM(arg) ((arg) == SLJIT_IMM)
+#define SLJIT_IS_REG_PAIR(arg) (!((arg) & SLJIT_MEM) && (arg) >= (SLJIT_MEM << 1))
+
/* Sets 32 bit operation mode on 64 bit CPUs. This option is ignored on
32 bit CPUs. When this option is set for an arithmetic operation, only
the lower 32 bits of the input registers are used, and the CPU status
@@ -1057,27 +1113,57 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
Note: loads a pointer sized data, useful on x32 mode (a 64 bit mode
on x86-64 which uses 32 bit pointers) or similar compiling modes */
#define SLJIT_MOV_P (SLJIT_OP1_BASE + 8)
-/* Flags: Z
- Note: immediate source argument is not supported */
-#define SLJIT_NOT (SLJIT_OP1_BASE + 9)
-#define SLJIT_NOT32 (SLJIT_NOT | SLJIT_32)
/* Count leading zeroes
Flags: - (may destroy flags)
Note: immediate source argument is not supported */
-#define SLJIT_CLZ (SLJIT_OP1_BASE + 10)
+#define SLJIT_CLZ (SLJIT_OP1_BASE + 9)
#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_32)
/* Count trailing zeroes
Flags: - (may destroy flags)
Note: immediate source argument is not supported */
-#define SLJIT_CTZ (SLJIT_OP1_BASE + 11)
+#define SLJIT_CTZ (SLJIT_OP1_BASE + 10)
#define SLJIT_CTZ32 (SLJIT_CTZ | SLJIT_32)
+/* Reverse the order of bytes
+ Flags: - (may destroy flags)
+ Note: converts between little and big endian formats
+ Note: immediate source argument is not supported */
+#define SLJIT_REV (SLJIT_OP1_BASE + 11)
+#define SLJIT_REV32 (SLJIT_REV | SLJIT_32)
+/* Reverse the order of bytes in the lower 16 bit and extend as unsigned
+ Flags: - (may destroy flags)
+ Note: converts between little and big endian formats
+ Note: immediate source argument is not supported */
+#define SLJIT_REV_U16 (SLJIT_OP1_BASE + 12)
+#define SLJIT_REV32_U16 (SLJIT_REV_U16 | SLJIT_32)
+/* Reverse the order of bytes in the lower 16 bit and extend as signed
+ Flags: - (may destroy flags)
+ Note: converts between little and big endian formats
+ Note: immediate source argument is not supported */
+#define SLJIT_REV_S16 (SLJIT_OP1_BASE + 13)
+#define SLJIT_REV32_S16 (SLJIT_REV_S16 | SLJIT_32)
+/* Reverse the order of bytes in the lower 32 bit and extend as unsigned
+ Flags: - (may destroy flags)
+ Note: converts between little and big endian formats
+ Note: immediate source argument is not supported */
+#define SLJIT_REV_U32 (SLJIT_OP1_BASE + 14)
+/* Reverse the order of bytes in the lower 32 bit and extend as signed
+ Flags: - (may destroy flags)
+ Note: converts between little and big endian formats
+ Note: immediate source argument is not supported */
+#define SLJIT_REV_S32 (SLJIT_OP1_BASE + 15)
+
+/* The following unary operations are supported by using sljit_emit_op2:
+ - binary not: SLJIT_XOR with immedate -1 as src1 or src2
+ - negate: SLJIT_SUB with immedate 0 as src1
+ Note: these operations are optimized by the compiler if the
+ target CPU has specialized instruction forms for them. */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw);
/* Starting index of opcodes for sljit_emit_op2. */
-#define SLJIT_OP2_BASE 96
+#define SLJIT_OP2_BASE 64
/* Flags: Z | OVERFLOW | CARRY */
#define SLJIT_ADD (SLJIT_OP2_BASE + 0)
@@ -1174,80 +1260,97 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
op must be one of the following operations:
SLJIT_SHL or SLJIT_SHL32:
- src_dst <<= src2
- src_dst |= ((src1 >> 1) >> (src2 ^ value_mask))
+ dst_reg = src1_reg << src3_reg
+ dst_reg |= ((src2_reg >> 1) >> (src3 ^ value_mask))
SLJIT_MSHL or SLJIT_MSHL32:
- src2 &= value_mask
+ src3 &= value_mask
perform the SLJIT_SHL or SLJIT_SHL32 operation
SLJIT_LSHR or SLJIT_LSHR32:
- src_dst >>= src2
- src_dst |= ((src1 << 1) << (src2 ^ value_mask))
+ dst_reg = src1_reg >> src3_reg
+ dst_reg |= ((src2_reg << 1) << (src3 ^ value_mask))
SLJIT_MLSHR or SLJIT_MLSHR32:
- src2 &= value_mask
+ src3 &= value_mask
perform the SLJIT_LSHR or SLJIT_LSHR32 operation
op can be combined (or'ed) with SLJIT_SHIFT_INTO_NON_ZERO
- src_dst must be a register which content is updated after
- the operation is completed
- src1 / src1w contains the bits which shifted into src_dst
- src2 / src2w contains the shift amount
+ dst_reg specifies the destination register, where dst_reg
+ and src2_reg cannot be the same registers
+ src1_reg specifies the source register
+ src2_reg specifies the register which is shifted into src1_reg
+ src3 / src3w contains the shift amount
- Note: a rotate operation can be performed if src_dst and
- src1 are set to the same register
+ Note: a rotate operation is performed if src1_reg and
+ src2_reg are the same registers
Flags: - (may destroy flags) */
-/* The src2 contains a non-zero value. Improves the generated
- code on certain architectures, which provides a small
- performance improvement. */
+/* The src3 operand contains a non-zero value. Improves
+ the generated code on certain architectures, which
+ provides a small performance improvement. */
#define SLJIT_SHIFT_INTO_NON_ZERO 0x200
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 src_dst,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w);
+ sljit_s32 dst_reg,
+ sljit_s32 src1_reg,
+ sljit_s32 src2_reg,
+ sljit_s32 src3, sljit_sw src3w);
-/* Starting index of opcodes for sljit_emit_op2. */
-#define SLJIT_OP_SRC_BASE 128
+/* Starting index of opcodes for sljit_emit_op_src
+ and sljit_emit_op_dst. */
+#define SLJIT_OP_SRC_DST_BASE 96
-/* Note: src cannot be an immedate value
+/* Fast return, see SLJIT_FAST_CALL for more details.
+ Note: src cannot be an immedate value
Flags: - (does not modify flags) */
-#define SLJIT_FAST_RETURN (SLJIT_OP_SRC_BASE + 0)
+#define SLJIT_FAST_RETURN (SLJIT_OP_SRC_DST_BASE + 0)
/* Skip stack frames before fast return.
Note: src cannot be an immedate value
Flags: may destroy flags. */
-#define SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN (SLJIT_OP_SRC_BASE + 1)
+#define SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN (SLJIT_OP_SRC_DST_BASE + 1)
/* Prefetch value into the level 1 data cache
Note: if the target CPU does not support data prefetch,
no instructions are emitted.
Note: this instruction never fails, even if the memory address is invalid.
Flags: - (does not modify flags) */
-#define SLJIT_PREFETCH_L1 (SLJIT_OP_SRC_BASE + 2)
+#define SLJIT_PREFETCH_L1 (SLJIT_OP_SRC_DST_BASE + 2)
/* Prefetch value into the level 2 data cache
Note: same as SLJIT_PREFETCH_L1 if the target CPU
does not support this instruction form.
Note: this instruction never fails, even if the memory address is invalid.
Flags: - (does not modify flags) */
-#define SLJIT_PREFETCH_L2 (SLJIT_OP_SRC_BASE + 3)
+#define SLJIT_PREFETCH_L2 (SLJIT_OP_SRC_DST_BASE + 3)
/* Prefetch value into the level 3 data cache
Note: same as SLJIT_PREFETCH_L2 if the target CPU
does not support this instruction form.
Note: this instruction never fails, even if the memory address is invalid.
Flags: - (does not modify flags) */
-#define SLJIT_PREFETCH_L3 (SLJIT_OP_SRC_BASE + 4)
+#define SLJIT_PREFETCH_L3 (SLJIT_OP_SRC_DST_BASE + 4)
/* Prefetch a value which is only used once (and can be discarded afterwards)
Note: same as SLJIT_PREFETCH_L1 if the target CPU
does not support this instruction form.
Note: this instruction never fails, even if the memory address is invalid.
Flags: - (does not modify flags) */
-#define SLJIT_PREFETCH_ONCE (SLJIT_OP_SRC_BASE + 5)
+#define SLJIT_PREFETCH_ONCE (SLJIT_OP_SRC_DST_BASE + 5)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src, sljit_sw srcw);
+/* Fast enter, see SLJIT_FAST_CALL for more details.
+ Flags: - (does not modify flags) */
+#define SLJIT_FAST_ENTER (SLJIT_OP_SRC_DST_BASE + 6)
+
+/* Copies the return address into dst. The return address is the
+ address where the execution continues after the called function
+ returns (see: sljit_emit_return / sljit_emit_return_void).
+ Flags: - (does not modify flags) */
+#define SLJIT_GET_RETURN_ADDRESS (SLJIT_OP_SRC_DST_BASE + 7)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw);
+
/* Starting index of opcodes for sljit_emit_fop1. */
-#define SLJIT_FOP1_BASE 160
+#define SLJIT_FOP1_BASE 128
/* Flags: - (does not modify flags) */
#define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0)
@@ -1270,15 +1373,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
/* Flags: - (may destroy flags) */
#define SLJIT_CONV_F64_FROM_S32 (SLJIT_FOP1_BASE + 5)
#define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_32)
+/* Flags: - (may destroy flags) */
+#define SLJIT_CONV_F64_FROM_UW (SLJIT_FOP1_BASE + 6)
+#define SLJIT_CONV_F32_FROM_UW (SLJIT_CONV_F64_FROM_UW | SLJIT_32)
+/* Flags: - (may destroy flags) */
+#define SLJIT_CONV_F64_FROM_U32 (SLJIT_FOP1_BASE + 7)
+#define SLJIT_CONV_F32_FROM_U32 (SLJIT_CONV_F64_FROM_U32 | SLJIT_32)
/* Note: dst is the left and src is the right operand for SLJIT_CMP_F32/64.
Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */
-#define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 6)
+#define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 8)
#define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_32)
/* Flags: - (may destroy flags) */
-#define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 7)
+#define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 9)
#define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_32)
/* Flags: - (may destroy flags) */
-#define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 8)
+#define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 10)
#define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_32)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1286,7 +1395,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
sljit_s32 src, sljit_sw srcw);
/* Starting index of opcodes for sljit_emit_fop2. */
-#define SLJIT_FOP2_BASE 192
+#define SLJIT_FOP2_BASE 160
/* Flags: - (may destroy flags) */
#define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0)
@@ -1306,10 +1415,90 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w);
+/* Starting index of opcodes for sljit_emit_fop2r. */
+#define SLJIT_FOP2R_BASE 168
+
+/* Flags: - (may destroy flags) */
+#define SLJIT_COPYSIGN_F64 (SLJIT_FOP2R_BASE + 0)
+#define SLJIT_COPYSIGN_F32 (SLJIT_COPYSIGN_F64 | SLJIT_32)
+
+/* Similar to sljit_emit_fop2, except the destination is always a register. */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2, sljit_sw src2w);
+
+/* Sets a floating point register to an immediate value. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f32 value);
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value);
+
+/* The following opcodes are used by sljit_emit_fcopy(). */
+
+/* 64 bit: copy a 64 bit value from an integer register into a
+ 64 bit floating point register without any modifications.
+ 32 bit: copy a 32 bit register or register pair into a 64 bit
+ floating point register without any modifications. The
+ register, or the first register of the register pair
+ replaces the high order 32 bit of the floating point
+ register. If a register pair is passed, the low
+ order 32 bit is replaced by the second register.
+ Otherwise, the low order 32 bit is unchanged. */
+#define SLJIT_COPY_TO_F64 1
+/* Copy a 32 bit value from an integer register into a 32 bit
+ floating point register without any modifications. */
+#define SLJIT_COPY32_TO_F32 (SLJIT_COPY_TO_F64 | SLJIT_32)
+/* 64 bit: copy the value of a 64 bit floating point register into
+ an integer register without any modifications.
+ 32 bit: copy a 64 bit floating point register into a 32 bit register
+ or a 32 bit register pair without any modifications. The
+ high order 32 bit of the floating point register is copied
+ into the register, or the first register of the register
+ pair. If a register pair is passed, the low order 32 bit
+ is copied into the second register. */
+#define SLJIT_COPY_FROM_F64 2
+/* Copy the value of a 32 bit floating point register into an integer
+ register without any modifications. The register should be processed
+ with 32 bit operations later. */
+#define SLJIT_COPY32_FROM_F32 (SLJIT_COPY_FROM_F64 | SLJIT_32)
+
+/* Special data copy which involves floating point registers.
+
+ op must be between SLJIT_COPY_TO_F64 and SLJIT_COPY32_FROM_F32
+ freg must be a floating point register
+ reg must be a register or register pair */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg);
+
/* Label and jump instructions. */
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler);
+/* The SLJIT_FAST_CALL is a calling method for creating lightweight function
+ calls. This type of calls preserve the values of all registers and stack
+ frame. Unlike normal function calls, the enter and return operations must
+ be performed by the SLJIT_FAST_ENTER and SLJIT_FAST_RETURN operations
+ respectively. The return address is stored in the dst argument of the
+ SLJIT_FAST_ENTER operation, and this return address should be passed as
+ the src argument for the SLJIT_FAST_RETURN operation to return from the
+ called function.
+
+ Fast calls are cheap operations (usually only a single call instruction is
+ emitted) but they do not preserve any registers. However the callee function
+ can freely use / update any registers and the locals area which can be
+ efficiently exploited by various optimizations. Registers can be saved
+ and restored manually if needed.
+
+ Although returning to different address by SLJIT_FAST_RETURN is possible,
+ this address usually cannot be predicted by the return address predictor of
+ modern CPUs which may reduce performance. Furthermore certain security
+ enhancement technologies such as Intel Control-flow Enforcement Technology
+ (CET) may disallow returning to a different address (indirect jumps
+ can be used instead, see SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN). */
+
/* Invert (negate) conditional type: xor (^) with 0x1 */
/* Integer comparison types. */
@@ -1321,19 +1510,19 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
#define SLJIT_LESS 2
#define SLJIT_SET_LESS SLJIT_SET(SLJIT_LESS)
#define SLJIT_GREATER_EQUAL 3
-#define SLJIT_SET_GREATER_EQUAL SLJIT_SET(SLJIT_GREATER_EQUAL)
+#define SLJIT_SET_GREATER_EQUAL SLJIT_SET(SLJIT_LESS)
#define SLJIT_GREATER 4
#define SLJIT_SET_GREATER SLJIT_SET(SLJIT_GREATER)
#define SLJIT_LESS_EQUAL 5
-#define SLJIT_SET_LESS_EQUAL SLJIT_SET(SLJIT_LESS_EQUAL)
+#define SLJIT_SET_LESS_EQUAL SLJIT_SET(SLJIT_GREATER)
#define SLJIT_SIG_LESS 6
#define SLJIT_SET_SIG_LESS SLJIT_SET(SLJIT_SIG_LESS)
#define SLJIT_SIG_GREATER_EQUAL 7
-#define SLJIT_SET_SIG_GREATER_EQUAL SLJIT_SET(SLJIT_SIG_GREATER_EQUAL)
+#define SLJIT_SET_SIG_GREATER_EQUAL SLJIT_SET(SLJIT_SIG_LESS)
#define SLJIT_SIG_GREATER 8
#define SLJIT_SET_SIG_GREATER SLJIT_SET(SLJIT_SIG_GREATER)
#define SLJIT_SIG_LESS_EQUAL 9
-#define SLJIT_SET_SIG_LESS_EQUAL SLJIT_SET(SLJIT_SIG_LESS_EQUAL)
+#define SLJIT_SET_SIG_LESS_EQUAL SLJIT_SET(SLJIT_SIG_GREATER)
#define SLJIT_OVERFLOW 10
#define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW)
@@ -1344,70 +1533,74 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
#define SLJIT_SET_CARRY SLJIT_SET(SLJIT_CARRY)
#define SLJIT_NOT_CARRY 13
+#define SLJIT_ATOMIC_STORED 14
+#define SLJIT_SET_ATOMIC_STORED SLJIT_SET(SLJIT_ATOMIC_STORED)
+#define SLJIT_ATOMIC_NOT_STORED 15
+
/* Basic floating point comparison types.
Note: when the comparison result is unordered, their behaviour is unspecified. */
-#define SLJIT_F_EQUAL 14
+#define SLJIT_F_EQUAL 16
#define SLJIT_SET_F_EQUAL SLJIT_SET(SLJIT_F_EQUAL)
-#define SLJIT_F_NOT_EQUAL 15
-#define SLJIT_SET_F_NOT_EQUAL SLJIT_SET(SLJIT_F_NOT_EQUAL)
-#define SLJIT_F_LESS 16
+#define SLJIT_F_NOT_EQUAL 17
+#define SLJIT_SET_F_NOT_EQUAL SLJIT_SET(SLJIT_F_EQUAL)
+#define SLJIT_F_LESS 18
#define SLJIT_SET_F_LESS SLJIT_SET(SLJIT_F_LESS)
-#define SLJIT_F_GREATER_EQUAL 17
-#define SLJIT_SET_F_GREATER_EQUAL SLJIT_SET(SLJIT_F_GREATER_EQUAL)
-#define SLJIT_F_GREATER 18
+#define SLJIT_F_GREATER_EQUAL 19
+#define SLJIT_SET_F_GREATER_EQUAL SLJIT_SET(SLJIT_F_LESS)
+#define SLJIT_F_GREATER 20
#define SLJIT_SET_F_GREATER SLJIT_SET(SLJIT_F_GREATER)
-#define SLJIT_F_LESS_EQUAL 19
-#define SLJIT_SET_F_LESS_EQUAL SLJIT_SET(SLJIT_F_LESS_EQUAL)
+#define SLJIT_F_LESS_EQUAL 21
+#define SLJIT_SET_F_LESS_EQUAL SLJIT_SET(SLJIT_F_GREATER)
/* Jumps when either argument contains a NaN value. */
-#define SLJIT_UNORDERED 20
+#define SLJIT_UNORDERED 22
#define SLJIT_SET_UNORDERED SLJIT_SET(SLJIT_UNORDERED)
/* Jumps when neither argument contains a NaN value. */
-#define SLJIT_ORDERED 21
-#define SLJIT_SET_ORDERED SLJIT_SET(SLJIT_ORDERED)
+#define SLJIT_ORDERED 23
+#define SLJIT_SET_ORDERED SLJIT_SET(SLJIT_UNORDERED)
/* Ordered / unordered floating point comparison types.
Note: each comparison type has an ordered and unordered form. Some
architectures supports only either of them (see: sljit_cmp_info). */
-#define SLJIT_ORDERED_EQUAL 22
+#define SLJIT_ORDERED_EQUAL 24
#define SLJIT_SET_ORDERED_EQUAL SLJIT_SET(SLJIT_ORDERED_EQUAL)
-#define SLJIT_UNORDERED_OR_NOT_EQUAL 23
-#define SLJIT_SET_UNORDERED_OR_NOT_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_NOT_EQUAL)
-#define SLJIT_ORDERED_LESS 24
+#define SLJIT_UNORDERED_OR_NOT_EQUAL 25
+#define SLJIT_SET_UNORDERED_OR_NOT_EQUAL SLJIT_SET(SLJIT_ORDERED_EQUAL)
+#define SLJIT_ORDERED_LESS 26
#define SLJIT_SET_ORDERED_LESS SLJIT_SET(SLJIT_ORDERED_LESS)
-#define SLJIT_UNORDERED_OR_GREATER_EQUAL 25
-#define SLJIT_SET_UNORDERED_OR_GREATER_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_GREATER_EQUAL)
-#define SLJIT_ORDERED_GREATER 26
+#define SLJIT_UNORDERED_OR_GREATER_EQUAL 27
+#define SLJIT_SET_UNORDERED_OR_GREATER_EQUAL SLJIT_SET(SLJIT_ORDERED_LESS)
+#define SLJIT_ORDERED_GREATER 28
#define SLJIT_SET_ORDERED_GREATER SLJIT_SET(SLJIT_ORDERED_GREATER)
-#define SLJIT_UNORDERED_OR_LESS_EQUAL 27
-#define SLJIT_SET_UNORDERED_OR_LESS_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_LESS_EQUAL)
+#define SLJIT_UNORDERED_OR_LESS_EQUAL 29
+#define SLJIT_SET_UNORDERED_OR_LESS_EQUAL SLJIT_SET(SLJIT_ORDERED_GREATER)
-#define SLJIT_UNORDERED_OR_EQUAL 28
+#define SLJIT_UNORDERED_OR_EQUAL 30
#define SLJIT_SET_UNORDERED_OR_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL)
-#define SLJIT_ORDERED_NOT_EQUAL 29
-#define SLJIT_SET_ORDERED_NOT_EQUAL SLJIT_SET(SLJIT_ORDERED_NOT_EQUAL)
-#define SLJIT_UNORDERED_OR_LESS 30
+#define SLJIT_ORDERED_NOT_EQUAL 31
+#define SLJIT_SET_ORDERED_NOT_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL)
+#define SLJIT_UNORDERED_OR_LESS 32
#define SLJIT_SET_UNORDERED_OR_LESS SLJIT_SET(SLJIT_UNORDERED_OR_LESS)
-#define SLJIT_ORDERED_GREATER_EQUAL 31
-#define SLJIT_SET_ORDERED_GREATER_EQUAL SLJIT_SET(SLJIT_ORDERED_GREATER_EQUAL)
-#define SLJIT_UNORDERED_OR_GREATER 32
+#define SLJIT_ORDERED_GREATER_EQUAL 33
+#define SLJIT_SET_ORDERED_GREATER_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_LESS)
+#define SLJIT_UNORDERED_OR_GREATER 34
#define SLJIT_SET_UNORDERED_OR_GREATER SLJIT_SET(SLJIT_UNORDERED_OR_GREATER)
-#define SLJIT_ORDERED_LESS_EQUAL 33
-#define SLJIT_SET_ORDERED_LESS_EQUAL SLJIT_SET(SLJIT_ORDERED_LESS_EQUAL)
+#define SLJIT_ORDERED_LESS_EQUAL 35
+#define SLJIT_SET_ORDERED_LESS_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_GREATER)
/* Unconditional jump types. */
-#define SLJIT_JUMP 34
-/* Fast calling method. See sljit_emit_fast_enter / SLJIT_FAST_RETURN. */
-#define SLJIT_FAST_CALL 35
+#define SLJIT_JUMP 36
+/* Fast calling method. See the description above. */
+#define SLJIT_FAST_CALL 37
/* Default C calling convention. */
-#define SLJIT_CALL 36
+#define SLJIT_CALL 38
/* Called function must be compiled by SLJIT.
See SLJIT_ENTER_REG_ARG option. */
-#define SLJIT_CALL_REG_ARG 37
+#define SLJIT_CALL_REG_ARG 39
/* The target can be changed during runtime (see: sljit_set_jump_addr). */
#define SLJIT_REWRITABLE_JUMP 0x1000
@@ -1497,19 +1690,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
sljit_s32 dst, sljit_sw dstw,
sljit_s32 type);
-/* Emit a conditional mov instruction which moves source to destination,
- if the condition is satisfied. Unlike other arithmetic operations this
- instruction does not support memory access.
+/* Emit a conditional select instruction which moves src1 to dst_reg,
+ if the condition is satisfied, or src2_reg to dst_reg otherwise.
type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL
- type can be combined (or'ed) with SLJIT_32
- dst_reg must be a valid register
- src must be a valid register or immediate (SLJIT_IMM)
+ type can be combined (or'ed) with SLJIT_32 to move 32 bit
+ register values instead of word sized ones
+ dst_reg and src2_reg must be valid registers
+ src1 must be valid operand
+
+ Note: if src1 is a memory operand, its value
+ might be loaded even if the condition is false.
Flags: - (does not modify flags) */
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw);
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_reg);
+
+/* Emit a conditional floating point select instruction which moves
+ src1 to dst_reg, if the condition is satisfied, or src2_reg to
+ dst_reg otherwise.
+
+ type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL
+ type can be combined (or'ed) with SLJIT_32 to move 32 bit
+ floating point values instead of 64 bit ones
+ dst_freg and src2_freg must be valid floating point registers
+ src1 must be valid operand
+
+ Note: if src1 is a memory operand, its value
+ might be loaded even if the condition is false.
+
+ Flags: - (does not modify flags) */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_freg);
/* The following flags are used by sljit_emit_mem(), sljit_emit_mem_update(),
sljit_emit_fmem(), and sljit_emit_fmem_update(). */
@@ -1524,9 +1740,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
/* Load or stora data from an unaligned (byte aligned) address. */
#define SLJIT_MEM_UNALIGNED 0x000400
/* Load or stora data from a 16 bit aligned address. */
-#define SLJIT_MEM_UNALIGNED_16 0x000800
+#define SLJIT_MEM_ALIGNED_16 0x000800
/* Load or stora data from a 32 bit aligned address. */
-#define SLJIT_MEM_UNALIGNED_32 0x001000
+#define SLJIT_MEM_ALIGNED_32 0x001000
/* The following flags are used by sljit_emit_mem_update(),
and sljit_emit_fmem_update(). */
@@ -1544,8 +1760,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
/* The sljit_emit_mem emits instructions for various memory operations:
- When SLJIT_MEM_UNALIGNED / SLJIT_MEM_UNALIGNED_16 /
- SLJIT_MEM_UNALIGNED_32 is set in type argument:
+ When SLJIT_MEM_UNALIGNED / SLJIT_MEM_ALIGNED_16 /
+ SLJIT_MEM_ALIGNED_32 is set in type argument:
Emit instructions for unaligned memory loads or stores. When
SLJIT_UNALIGNED is not defined, the only way to access unaligned
memory data is using sljit_emit_mem. Otherwise all operations (e.g.
@@ -1560,8 +1776,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
location specified by the mem/memw arguments, and the end address
of this operation is the starting address of the data transfer
between the second register and memory. The type argument must
- be SLJIT_MOV. The SLJIT_MEM_UNALIGNED* options are allowed for
- this operation.
+ be SLJIT_MOV. The SLJIT_MEM_UNALIGNED / SLJIT_MEM_ALIGNED_*
+ options are allowed for this operation.
type must be between SLJIT_MOV and SLJIT_MOV_P and can be
combined (or'ed) with SLJIT_MEM_* flags
@@ -1625,6 +1841,286 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler
sljit_s32 freg,
sljit_s32 mem, sljit_sw memw);
+/* The following options are used by several simd operations. */
+
+/* Load data into a simd register, this is the default */
+#define SLJIT_SIMD_LOAD 0x000000
+/* Store data from a simd register */
+#define SLJIT_SIMD_STORE 0x000001
+/* The simd register contains floating point values */
+#define SLJIT_SIMD_FLOAT 0x000400
+/* Tests whether the operation is available */
+#define SLJIT_SIMD_TEST 0x000800
+/* Move data to/from a 64 bit (8 byte) long SIMD register */
+#define SLJIT_SIMD_REG_64 (3 << 12)
+/* Move data to/from a 128 bit (16 byte) long SIMD register */
+#define SLJIT_SIMD_REG_128 (4 << 12)
+/* Move data to/from a 256 bit (32 byte) long SIMD register */
+#define SLJIT_SIMD_REG_256 (5 << 12)
+/* Move data to/from a 512 bit (64 byte) long SIMD register */
+#define SLJIT_SIMD_REG_512 (6 << 12)
+/* Element size is 8 bit long (this is the default), usually cannot be combined with SLJIT_SIMD_FLOAT */
+#define SLJIT_SIMD_ELEM_8 (0 << 18)
+/* Element size is 16 bit long, usually cannot be combined with SLJIT_SIMD_FLOAT */
+#define SLJIT_SIMD_ELEM_16 (1 << 18)
+/* Element size is 32 bit long */
+#define SLJIT_SIMD_ELEM_32 (2 << 18)
+/* Element size is 64 bit long */
+#define SLJIT_SIMD_ELEM_64 (3 << 18)
+/* Element size is 128 bit long */
+#define SLJIT_SIMD_ELEM_128 (4 << 18)
+/* Element size is 256 bit long */
+#define SLJIT_SIMD_ELEM_256 (5 << 18)
+
+/* The following options are used by sljit_emit_simd_mov(). */
+
+/* Memory address is unaligned (this is the default) */
+#define SLJIT_SIMD_MEM_UNALIGNED (0 << 24)
+/* Memory address is 16 bit aligned */
+#define SLJIT_SIMD_MEM_ALIGNED_16 (1 << 24)
+/* Memory address is 32 bit aligned */
+#define SLJIT_SIMD_MEM_ALIGNED_32 (2 << 24)
+/* Memory address is 64 bit aligned */
+#define SLJIT_SIMD_MEM_ALIGNED_64 (3 << 24)
+/* Memory address is 128 bit aligned */
+#define SLJIT_SIMD_MEM_ALIGNED_128 (4 << 24)
+/* Memory address is 256 bit aligned */
+#define SLJIT_SIMD_MEM_ALIGNED_256 (5 << 24)
+/* Memory address is 512 bit aligned */
+#define SLJIT_SIMD_MEM_ALIGNED_512 (6 << 24)
+
+/* Moves data between a simd register and memory.
+
+ If the operation is not supported, it returns with
+ SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
+ it does not emit any instructions.
+
+ type must be a combination of SLJIT_SIMD_* and
+ SLJIT_SIMD_MEM_* options
+ freg is the source or destination simd register
+ of the operation
+ srcdst must be a memory operand or a simd register
+
+ Note:
+ The alignment and element size must be
+ less or equal than simd register size.
+
+ Flags: - (does not modify flags) */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 srcdst, sljit_sw srcdstw);
+
+/* Replicates a scalar value to all lanes of a simd
+ register.
+
+ If the operation is not supported, it returns with
+ SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
+ it does not emit any instructions.
+
+ type must be a combination of SLJIT_SIMD_* options
+ except SLJIT_SIMD_STORE.
+ freg is the destination simd register of the operation
+ src is the value which is replicated
+
+ Note:
+ The src == SLJIT_IMM and srcw == 0 can be used to
+ clear a register even when SLJIT_SIMD_FLOAT is set.
+
+ Flags: - (does not modify flags) */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw);
+
+/* The following options are used by sljit_emit_simd_lane_mov(). */
+
+/* Clear all bits of the simd register before loading the lane. */
+#define SLJIT_SIMD_LANE_ZERO 0x000002
+/* Sign extend the integer value stored from the lane. */
+#define SLJIT_SIMD_LANE_SIGNED 0x000004
+
+/* Moves data between a simd register lane and a register or
+ memory. If the srcdst argument is a register, it must be
+ a floating point register when SLJIT_SIMD_FLOAT is specified,
+ or a general purpose register otherwise.
+
+ If the operation is not supported, it returns with
+ SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
+ it does not emit any instructions.
+
+ type must be a combination of SLJIT_SIMD_* options
+ Further options:
+ SLJIT_32 - when SLJIT_SIMD_FLOAT is not set
+ SLJIT_SIMD_LANE_SIGNED - when SLJIT_SIMD_STORE
+ is set and SLJIT_SIMD_FLOAT is not set
+ SLJIT_SIMD_LANE_ZERO - when SLJIT_SIMD_LOAD
+ is specified
+ freg is the source or destination simd register
+ of the operation
+ lane_index is the index of the lane
+ srcdst is the destination operand for loads, and
+ source operand for stores
+
+ Note:
+ The elem size must be lower than register size.
+
+ Flags: - (does not modify flags) */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg, sljit_s32 lane_index,
+ sljit_s32 srcdst, sljit_sw srcdstw);
+
+/* Replicates a scalar value from a lane to all lanes
+ of a simd register.
+
+ If the operation is not supported, it returns with
+ SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
+ it does not emit any instructions.
+
+ type must be a combination of SLJIT_SIMD_* options
+ except SLJIT_SIMD_STORE.
+ freg is the destination simd register of the operation
+ src is the simd register which lane is replicated
+ src_lane_index is the lane index of the src register
+
+ Flags: - (does not modify flags) */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_s32 src_lane_index);
+
+/* The following options are used by sljit_emit_simd_load_extend(). */
+
+/* Sign extend the integer elements */
+#define SLJIT_SIMD_EXTEND_SIGNED 0x000002
+/* Extend data to 16 bit */
+#define SLJIT_SIMD_EXTEND_16 (1 << 24)
+/* Extend data to 32 bit */
+#define SLJIT_SIMD_EXTEND_32 (2 << 24)
+/* Extend data to 64 bit */
+#define SLJIT_SIMD_EXTEND_64 (3 << 24)
+
+/* Extend elements and stores them in a simd register.
+ The extension operation increases the size of the
+ elements (e.g. from 16 bit to 64 bit). For integer
+ values, the extension can be signed or unsigned.
+
+ If the operation is not supported, it returns with
+ SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
+ it does not emit any instructions.
+
+ type must be a combination of SLJIT_SIMD_*, and
+ SLJIT_SIMD_EXTEND_* options except SLJIT_SIMD_STORE
+ freg is the destination simd register of the operation
+ src must be a memory operand or a simd register.
+ In the latter case, the source elements are stored
+ in the lower half of the register.
+
+ Flags: - (does not modify flags) */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw);
+
+/* Extract the highest bit (usually the sign bit) from
+ each elements of a vector.
+
+ If the operation is not supported, it returns with
+ SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
+ it does not emit any instructions.
+
+ type must be a combination of SLJIT_SIMD_* and SLJIT_32
+ options except SLJIT_SIMD_LOAD
+ freg is the source simd register of the operation
+ dst is the destination operand
+
+ Flags: - (does not modify flags) */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw);
+
+/* The following options are used by sljit_emit_simd_op2(). */
+
+/* Binary 'and' operation */
+#define SLJIT_SIMD_OP2_AND 0x000001
+/* Binary 'or' operation */
+#define SLJIT_SIMD_OP2_OR 0x000002
+/* Binary 'xor' operation */
+#define SLJIT_SIMD_OP2_XOR 0x000003
+
+/* Perform simd operations using simd registers.
+
+ If the operation is not supported, it returns with
+ SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
+ it does not emit any instructions.
+
+ type must be a combination of SLJIT_SIMD_* and SLJIT_SIMD_OP2_
+ options except SLJIT_SIMD_LOAD and SLJIT_SIMD_STORE
+ dst_freg is the destination register of the operation
+ src1_freg is the first source register of the operation
+ src1_freg is the second source register of the operation
+
+ Flags: - (does not modify flags) */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg);
+
+/* The sljit_emit_atomic_load and sljit_emit_atomic_store operation pair
+ can perform an atomic read-modify-write operation. First, an unsigned
+ value must be loaded from memory using sljit_emit_atomic_load. Then,
+ the updated value must be written back to the same memory location by
+ sljit_emit_atomic_store. A thread can only perform a single atomic
+ operation at a time.
+
+ Note: atomic operations are experimental, and not implemented
+ for all cpus.
+
+ The following conditions must be satisfied, or the operation
+ is undefined:
+ - the address provided in mem_reg must be divisible by the size of
+ the value (only naturally aligned updates are supported)
+ - no memory writes are allowed between the load and store operations
+ regardless of its target address (currently read operations are
+ allowed, but this might change in the future)
+ - the memory operation (op) and the base address (stored in mem_reg)
+ passed to the load/store operations must be the same (the mem_reg
+ can be a different register, only its value must be the same)
+ - an store must always follow a load for the same transaction.
+
+ op must be between SLJIT_MOV and SLJIT_MOV_P, excluding all
+ signed loads such as SLJIT_MOV32_S16
+ dst_reg is the register where the data will be loaded into
+ mem_reg is the base address of the memory load (it cannot be
+ SLJIT_SP or a virtual register on x86-32)
+
+ Flags: - (does not modify flags) */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_reg,
+ sljit_s32 mem_reg);
+
+/* The sljit_emit_atomic_load and sljit_emit_atomic_store operations
+ allows performing an atomic read-modify-write operation. See the
+ description of sljit_emit_atomic_load.
+
+ op must be between SLJIT_MOV and SLJIT_MOV_P, excluding all signed
+ loads such as SLJIT_MOV32_S16
+ src_reg is the register which value is stored into the memory
+ mem_reg is the base address of the memory store (it cannot be
+ SLJIT_SP or a virtual register on x86-32)
+ temp_reg is a not preserved scratch register, which must be
+ initialized with the value loaded into the dst_reg during the
+ corresponding sljit_emit_atomic_load operation, or the operation
+ is undefined
+
+ Flags: ATOMIC_STORED is set if the operation is successful,
+ otherwise the memory remains unchanged. */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src_reg,
+ sljit_s32 mem_reg,
+ sljit_s32 temp_reg);
+
/* Copies the base address of SLJIT_SP + offset to dst. The offset can
represent the starting address of a value in the local data (stack).
The offset is not limited by the local data limits, it can be any value.
@@ -1665,30 +2161,39 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
/* CPU specific functions */
/* --------------------------------------------------------------------- */
+/* Types for sljit_get_register_index */
+
+/* General purpose (integer) registers. */
+#define SLJIT_GP_REGISTER 0
+/* Floating point registers. */
+#define SLJIT_FLOAT_REGISTER 1
+
/* The following function is a helper function for sljit_emit_op_custom.
- It returns with the real machine register index ( >=0 ) of any SLJIT_R,
- SLJIT_S and SLJIT_SP registers.
+ It returns with the real machine register index ( >=0 ) of any registers.
- Note: it returns with -1 for virtual registers (only on x86-32). */
+ When type is SLJIT_GP_REGISTER:
+ reg must be an SLJIT_R(i), SLJIT_S(i), or SLJIT_SP register
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg);
+ When type is SLJIT_FLOAT_REGISTER:
+ reg must be an SLJIT_FR(i) or SLJIT_FS(i) register
-/* The following function is a helper function for sljit_emit_op_custom.
- It returns with the real machine register ( >= 0 ) index of any SLJIT_FR,
- and SLJIT_FS register.
+ When type is SLJIT_SIMD_REG_64 / 128 / 256 / 512 :
+ reg must be an SLJIT_FR(i) or SLJIT_FS(i) register
- Note: the index is always an even number on ARM-32, MIPS. */
+ Note: it returns with -1 for unknown registers, such as virtual
+ registers on x86-32 or unsupported simd registers. */
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg);
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg);
/* Any instruction can be inserted into the instruction stream by
sljit_emit_op_custom. It has a similar purpose as inline assembly.
The size parameter must match to the instruction size of the target
architecture:
- x86: 0 < size <= 15. The instruction argument can be byte aligned.
+ x86: 0 < size <= 15, the instruction argument can be byte aligned.
Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
if size == 4, the instruction argument must be 4 byte aligned.
+ s390x: size can be 2, 4, or 6, the instruction argument can be byte aligned.
Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -1725,7 +2230,8 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *com
to know the type of the code generator. */
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void);
-/* Portable helper function to get an offset of a member. */
+/* Portable helper function to get an offset of a member.
+ Same as offsetof() macro defined in stddef.h */
#define SLJIT_OFFSETOF(base, member) ((sljit_sw)(&((base*)0x10)->member) - 0x10)
#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK)
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c
index 54b8ade063..d44616d800 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c
@@ -34,13 +34,16 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
{
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO;
-#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- return "ARMv5" SLJIT_CPUINFO ARM_ABI_INFO;
+#elif (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
+ return "ARMv6" SLJIT_CPUINFO ARM_ABI_INFO;
#else
#error "Internal error: Unknown ARM architecture"
#endif
}
+/* Length of an instruction word. */
+typedef sljit_u32 sljit_ins;
+
/* Last register + 1. */
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
@@ -55,27 +58,39 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
#define CONST_POOL_EMPTY 0xffffffff
#define ALIGN_INSTRUCTION(ptr) \
- (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1))
+ (sljit_ins*)(((sljit_ins)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1))
#define MAX_DIFFERENCE(max_diff) \
- (((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))
+ (((max_diff) / (sljit_s32)sizeof(sljit_ins)) - (CONST_POOL_ALIGNMENT - 1))
/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
};
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
- 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7
+static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
+ 0,
+ 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6,
+ 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6
+};
+
+static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
+ 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1
};
-#define RM(rm) ((sljit_uw)reg_map[rm])
-#define RM8(rm) ((sljit_uw)reg_map[rm] << 8)
-#define RD(rd) ((sljit_uw)reg_map[rd] << 12)
-#define RN(rn) ((sljit_uw)reg_map[rn] << 16)
+#define RM(rm) ((sljit_ins)reg_map[rm])
+#define RM8(rm) ((sljit_ins)reg_map[rm] << 8)
+#define RD(rd) ((sljit_ins)reg_map[rd] << 12)
+#define RN(rn) ((sljit_ins)reg_map[rn] << 16)
-#define VM(rm) ((sljit_uw)freg_map[rm])
-#define VD(rd) ((sljit_uw)freg_map[rd] << 12)
-#define VN(rn) ((sljit_uw)freg_map[rn] << 16)
+#define VM(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
+#define VD(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
+#define VN(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
/* --------------------------------------------------------------------- */
/* Instrucion forms */
@@ -92,16 +107,19 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define AND 0xe0000000
#define B 0xea000000
#define BIC 0xe1c00000
+#define BKPT 0xe1200070
#define BL 0xeb000000
#define BLX 0xe12fff30
#define BX 0xe12fff10
#define CLZ 0xe16f0f10
#define CMN 0xe1600000
#define CMP 0xe1400000
-#define BKPT 0xe1200070
#define EOR 0xe0200000
#define LDR 0xe5100000
#define LDR_POST 0xe4100000
+#define LDREX 0xe1900f9f
+#define LDREXB 0xe1d00f9f
+#define LDREXH 0xe1f00f9f
#define MOV 0xe1a00000
#define MUL 0xe0000090
#define MVN 0xe1e00000
@@ -109,50 +127,89 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define ORR 0xe1800000
#define PUSH 0xe92d0000
#define POP 0xe8bd0000
-#define RBIT 0xe6ff0f30
+#define REV 0xe6bf0f30
+#define REV16 0xe6bf0fb0
#define RSB 0xe0600000
#define RSC 0xe0e00000
#define SBC 0xe0c00000
#define SMULL 0xe0c00090
#define STR 0xe5000000
+#define STREX 0xe1800f90
+#define STREXB 0xe1c00f90
+#define STREXH 0xe1e00f90
#define SUB 0xe0400000
+#define SXTB 0xe6af0070
+#define SXTH 0xe6bf0070
#define TST 0xe1000000
#define UMULL 0xe0800090
+#define UXTB 0xe6ef0070
+#define UXTH 0xe6ff0070
#define VABS_F32 0xeeb00ac0
#define VADD_F32 0xee300a00
+#define VAND 0xf2000110
#define VCMP_F32 0xeeb40a40
#define VCVT_F32_S32 0xeeb80ac0
+#define VCVT_F32_U32 0xeeb80a40
#define VCVT_F64_F32 0xeeb70ac0
#define VCVT_S32_F32 0xeebd0ac0
#define VDIV_F32 0xee800a00
+#define VDUP 0xee800b10
+#define VDUP_s 0xf3b00c00
+#define VEOR 0xf3000110
+#define VLD1 0xf4200000
+#define VLD1_r 0xf4a00c00
+#define VLD1_s 0xf4a00000
#define VLDR_F32 0xed100a00
#define VMOV_F32 0xeeb00a40
#define VMOV 0xee000a10
#define VMOV2 0xec400a10
+#define VMOV_i 0xf2800010
+#define VMOV_s 0xee000b10
+#define VMOVN 0xf3b20200
#define VMRS 0xeef1fa10
#define VMUL_F32 0xee200a00
#define VNEG_F32 0xeeb10a40
+#define VORR 0xf2200110
#define VPOP 0xecbd0b00
#define VPUSH 0xed2d0b00
+#define VSHLL 0xf2800a10
+#define VSHR 0xf2800010
+#define VSRA 0xf2800110
+#define VST1 0xf4000000
+#define VST1_s 0xf4800000
#define VSTR_F32 0xed000a00
#define VSUB_F32 0xee300a40
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
/* Arm v7 specific instructions. */
-#define MOVW 0xe3000000
#define MOVT 0xe3400000
-#define SXTB 0xe6af0070
-#define SXTH 0xe6bf0070
-#define UXTB 0xe6ef0070
-#define UXTH 0xe6ff0070
+#define MOVW 0xe3000000
+#define RBIT 0xe6ff0f30
#endif
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+
+static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
+{
+ if (compiler->scratches == -1)
+ return 0;
+
+ if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
+ fr -= SLJIT_F64_SECOND(0);
+
+ return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
+ || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
+ || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
+}
+
+#endif /* SLJIT_ARGUMENT_CHECKS */
+
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
static sljit_s32 push_cpool(struct sljit_compiler *compiler)
{
/* Pushing the constant pool into the instruction stream. */
- sljit_uw* inst;
+ sljit_ins* inst;
sljit_uw* cpool_ptr;
sljit_uw* cpool_end;
sljit_s32 i;
@@ -162,13 +219,13 @@ static sljit_s32 push_cpool(struct sljit_compiler *compiler)
compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
- inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+ inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
FAIL_IF(!inst);
compiler->size++;
*inst = 0xff000000 | compiler->cpool_fill;
for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
- inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+ inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
FAIL_IF(!inst);
compiler->size++;
*inst = 0;
@@ -177,7 +234,7 @@ static sljit_s32 push_cpool(struct sljit_compiler *compiler)
cpool_ptr = compiler->cpool;
cpool_end = cpool_ptr + compiler->cpool_fill;
while (cpool_ptr < cpool_end) {
- inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+ inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
FAIL_IF(!inst);
compiler->size++;
*inst = *cpool_ptr++;
@@ -187,23 +244,23 @@ static sljit_s32 push_cpool(struct sljit_compiler *compiler)
return SLJIT_SUCCESS;
}
-static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst)
+static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
{
- sljit_uw* ptr;
+ sljit_ins* ptr;
if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
FAIL_IF(push_cpool(compiler));
- ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+ ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
FAIL_IF(!ptr);
compiler->size++;
*ptr = inst;
return SLJIT_SUCCESS;
}
-static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
+static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
{
- sljit_uw* ptr;
+ sljit_ins* ptr;
sljit_uw cpool_index = CPOOL_SIZE;
sljit_uw* cpool_ptr;
sljit_uw* cpool_end;
@@ -239,7 +296,7 @@ static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_u
}
SLJIT_ASSERT((inst & 0xfff) == 0);
- ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+ ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
FAIL_IF(!ptr);
compiler->size++;
*ptr = inst | cpool_index;
@@ -251,14 +308,15 @@ static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_u
return SLJIT_SUCCESS;
}
-static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
+static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
{
- sljit_uw* ptr;
+ sljit_ins* ptr;
+
if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
FAIL_IF(push_cpool(compiler));
SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
- ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+ ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
FAIL_IF(!ptr);
compiler->size++;
*ptr = inst | compiler->cpool_fill;
@@ -305,7 +363,7 @@ static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_
while (last_pc_patch < code_ptr) {
/* Data transfer instruction with Rn == r15. */
- if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) {
+ if ((*last_pc_patch & 0x0e0f0000) == 0x040f0000) {
diff = (sljit_uw)(const_pool - last_pc_patch);
ind = (*last_pc_patch) & 0xfff;
@@ -395,11 +453,11 @@ static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struc
#else
-static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst)
+static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
{
- sljit_uw* ptr;
+ sljit_ins* ptr;
- ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+ ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
FAIL_IF(!ptr);
compiler->size++;
*ptr = inst;
@@ -421,7 +479,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw
if (jump->flags & SLJIT_REWRITABLE_JUMP)
return 0;
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
if (jump->flags & IS_BL)
code_ptr--;
@@ -449,7 +507,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw
jump->flags |= PATCH_B;
}
}
-#else
+#else /* !SLJIT_CONFIG_ARM_V6 */
if (jump->flags & JUMP_ADDR)
diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset);
else {
@@ -467,16 +525,16 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw
jump->flags |= PATCH_B;
return 1;
}
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
return 0;
}
static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache)
{
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- sljit_uw *ptr = (sljit_uw *)jump_ptr;
- sljit_uw *inst = (sljit_uw *)ptr[0];
- sljit_uw mov_pc = ptr[1];
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
+ sljit_ins *ptr = (sljit_ins*)jump_ptr;
+ sljit_ins *inst = (sljit_ins*)ptr[0];
+ sljit_ins mov_pc = ptr[1];
sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2);
@@ -491,7 +549,7 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw execut
inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
if (flush_cache) {
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
- inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
+ inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 1);
}
} else {
@@ -502,7 +560,7 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw execut
inst[1] = NOP;
if (flush_cache) {
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
- inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
+ inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
}
@@ -521,14 +579,14 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw execut
if (!bl) {
if (flush_cache) {
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
- inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
+ inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 1);
}
} else {
inst[1] = BLX | RM(TMP_REG1);
if (flush_cache) {
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
- inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
+ inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
}
@@ -544,8 +602,8 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw execut
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
}
}
-#else
- sljit_uw *inst = (sljit_uw*)jump_ptr;
+#else /* !SLJIT_CONFIG_ARM_V6 */
+ sljit_ins *inst = (sljit_ins*)jump_ptr;
SLJIT_UNUSED_ARG(executable_offset);
@@ -560,10 +618,10 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw execut
if (flush_cache) {
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
- inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
+ inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
}
static sljit_uw get_imm(sljit_uw imm);
@@ -572,9 +630,9 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s
static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache)
{
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- sljit_uw *ptr = (sljit_uw*)addr;
- sljit_uw *inst = (sljit_uw*)ptr[0];
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
+ sljit_ins *ptr = (sljit_ins*)addr;
+ sljit_ins *inst = (sljit_ins*)ptr[0];
sljit_uw ldr_literal = ptr[1];
sljit_uw src2;
@@ -590,7 +648,7 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_off
if (flush_cache) {
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
- inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
+ inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 1);
}
return;
@@ -606,7 +664,7 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_off
if (flush_cache) {
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
- inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
+ inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 1);
}
return;
@@ -626,7 +684,7 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_off
if (flush_cache) {
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
- inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
+ inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 1);
}
}
@@ -640,8 +698,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_off
if (flush_cache) {
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
}
-#else
- sljit_uw *inst = (sljit_uw*)addr;
+#else /* !SLJIT_CONFIG_ARM_V6 */
+ sljit_ins *inst = (sljit_ins*)addr;
SLJIT_UNUSED_ARG(executable_offset);
@@ -656,30 +714,30 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_off
if (flush_cache) {
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
- inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
+ inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
}
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
{
struct sljit_memory_fragment *buf;
- sljit_uw *code;
- sljit_uw *code_ptr;
- sljit_uw *buf_ptr;
- sljit_uw *buf_end;
+ sljit_ins *code;
+ sljit_ins *code_ptr;
+ sljit_ins *buf_ptr;
+ sljit_ins *buf_end;
sljit_uw size;
sljit_uw word_count;
sljit_uw next_addr;
sljit_sw executable_offset;
sljit_uw addr;
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
sljit_uw cpool_size;
sljit_uw cpool_skip_alignment;
sljit_uw cpool_current_index;
- sljit_uw *cpool_start_address;
- sljit_uw *last_pc_patch;
+ sljit_ins *cpool_start_address;
+ sljit_ins *last_pc_patch;
struct future_patch *first_patch;
#endif
@@ -693,25 +751,25 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
reverse_buf(compiler);
/* Second code generation pass. */
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
size = compiler->size + (compiler->patches << 1);
if (compiler->cpool_fill > 0)
size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
-#else
+#else /* !SLJIT_CONFIG_ARM_V6 */
size = compiler->size;
-#endif
- code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw), compiler->exec_allocator_data);
+#endif /* SLJIT_CONFIG_ARM_V6 */
+ code = (sljit_ins*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_ins), compiler->exec_allocator_data);
PTR_FAIL_WITH_EXEC_IF(code);
buf = compiler->buf;
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
cpool_size = 0;
cpool_skip_alignment = 0;
cpool_current_index = 0;
cpool_start_address = NULL;
first_patch = NULL;
last_pc_patch = code;
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
code_ptr = code;
word_count = 0;
@@ -729,11 +787,11 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
do {
- buf_ptr = (sljit_uw*)buf->memory;
+ buf_ptr = (sljit_ins*)buf->memory;
buf_end = buf_ptr + (buf->used_size >> 2);
do {
word_count++;
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
if (cpool_size > 0) {
if (cpool_skip_alignment > 0) {
buf_ptr++;
@@ -761,7 +819,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
}
else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
*code_ptr = *buf_ptr++;
if (next_addr == word_count) {
SLJIT_ASSERT(!label || label->size >= word_count);
@@ -771,15 +829,15 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
/* These structures are ordered by their address. */
if (jump && jump->addr == word_count) {
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
if (detect_jump_type(jump, code_ptr, code, executable_offset))
code_ptr--;
jump->addr = (sljit_uw)code_ptr;
-#else
+#else /* !SLJIT_CONFIG_ARM_V6 */
jump->addr = (sljit_uw)(code_ptr - 2);
if (detect_jump_type(jump, code_ptr, code, executable_offset))
code_ptr -= 2;
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
jump = jump->next;
}
if (label && label->size == word_count) {
@@ -789,11 +847,11 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
label = label->next;
}
if (const_ && const_->addr == word_count) {
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
const_->addr = (sljit_uw)code_ptr;
-#else
+#else /* !SLJIT_CONFIG_ARM_V6 */
const_->addr = (sljit_uw)(code_ptr - 1);
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
const_ = const_->next;
}
if (put_label && put_label->addr == word_count) {
@@ -804,9 +862,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
next_addr = compute_next_addr(label, jump, const_, put_label);
}
code_ptr++;
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- }
- else {
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
+ } else {
/* Fortunately, no need to shift. */
cpool_size = *buf_ptr++ & ~PUSH_POOL;
SLJIT_ASSERT(cpool_size > 0);
@@ -814,14 +871,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
if (cpool_current_index > 0) {
/* Unconditional branch. */
- *code_ptr = B | (((sljit_uw)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
- code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index);
+ *code_ptr = B | (((sljit_ins)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
+ code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
}
cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
cpool_current_index = 0;
last_pc_patch = code_ptr;
}
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
} while (buf_ptr < buf_end);
buf = buf->next;
} while (buf);
@@ -831,13 +888,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(!const_);
SLJIT_ASSERT(!put_label);
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
SLJIT_ASSERT(cpool_size == 0);
if (compiler->cpool_fill > 0) {
cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
if (cpool_current_index > 0)
- code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index);
+ code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
buf_ptr = compiler->cpool;
buf_end = buf_ptr + compiler->cpool_fill;
@@ -857,7 +914,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = compiler->jumps;
while (jump) {
- buf_ptr = (sljit_uw *)jump->addr;
+ buf_ptr = (sljit_ins*)jump->addr;
if (jump->flags & PATCH_B) {
addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
@@ -872,18 +929,17 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
}
else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
jump->addr = (sljit_uw)code_ptr;
- code_ptr[0] = (sljit_uw)buf_ptr;
+ code_ptr[0] = (sljit_ins)buf_ptr;
code_ptr[1] = *buf_ptr;
inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
code_ptr += 2;
-#else
+#else /* !SLJIT_CONFIG_ARM_V6 */
inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
-#endif
- }
- else {
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#endif /* SLJIT_CONFIG_ARM_V6 */
+ } else {
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
if (jump->flags & IS_BL)
buf_ptr--;
if (*buf_ptr & (1 << 23))
@@ -891,20 +947,20 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
else
buf_ptr += 1;
*buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
-#else
+#else /* !SLJIT_CONFIG_ARM_V6 */
inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
}
jump = jump->next;
}
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
const_ = compiler->consts;
while (const_) {
- buf_ptr = (sljit_uw*)const_->addr;
+ buf_ptr = (sljit_ins*)const_->addr;
const_->addr = (sljit_uw)code_ptr;
- code_ptr[0] = (sljit_uw)buf_ptr;
+ code_ptr[0] = (sljit_ins)buf_ptr;
code_ptr[1] = *buf_ptr;
if (*buf_ptr & (1 << 23))
buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
@@ -916,21 +972,21 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
const_ = const_->next;
}
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
put_label = compiler->put_labels;
while (put_label) {
addr = put_label->label->addr;
- buf_ptr = (sljit_uw*)put_label->addr;
+ buf_ptr = (sljit_ins*)put_label->addr;
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
SLJIT_ASSERT((buf_ptr[0] & 0xffff0000) == 0xe59f0000);
buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr;
-#else
+#else /* !SLJIT_CONFIG_ARM_V6 */
SLJIT_ASSERT((buf_ptr[-1] & 0xfff00000) == MOVW && (buf_ptr[0] & 0xfff00000) == MOVT);
buf_ptr[-1] |= ((addr << 4) & 0xf0000) | (addr & 0xfff);
buf_ptr[0] |= ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff);
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
put_label = put_label->next;
}
@@ -940,8 +996,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
compiler->executable_offset = executable_offset;
compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw);
- code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
- code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+ code = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+ code_ptr = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
SLJIT_CACHE_FLUSH(code, code_ptr);
SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
@@ -952,26 +1008,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
{
switch (feature_type) {
case SLJIT_HAS_FPU:
+ case SLJIT_HAS_F64_AS_F32_PAIR:
#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
#else
/* Available by default. */
return 1;
-#endif
+#endif /* SLJIT_IS_FPU_AVAILABLE */
+ case SLJIT_HAS_SIMD:
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
+ return 0;
+#else
+#ifdef SLJIT_IS_FPU_AVAILABLE
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
+#else
+ /* Available by default. */
+ return 1;
+#endif /* SLJIT_IS_FPU_AVAILABLE */
+#endif /* SLJIT_CONFIG_ARM_V6 */
+ case SLJIT_SIMD_REGS_ARE_PAIRS:
case SLJIT_HAS_CLZ:
case SLJIT_HAS_ROT:
case SLJIT_HAS_CMOV:
-#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
- case SLJIT_HAS_CTZ:
+ case SLJIT_HAS_REV:
case SLJIT_HAS_PREFETCH:
-#endif
+ case SLJIT_HAS_COPY_F32:
+ case SLJIT_HAS_COPY_F64:
+ case SLJIT_HAS_ATOMIC:
return 1;
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
case SLJIT_HAS_CTZ:
+#if defined(SLJIT_CONFIG_ARM_V6) && SLJIT_CONFIG_ARM_V6
return 2;
-#endif
+#else
+ return 1;
+#endif /* SLJIT_CONFIG_ARM_V6 */
default:
return 0;
@@ -991,17 +1063,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
#define LOAD_DATA 0x08
/* Flag bits for emit_op. */
-#define ALLOW_IMM 0x10
-#define ALLOW_INV_IMM 0x20
-#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM)
-#define ALLOW_NEG_IMM 0x40
+#define ALLOW_IMM 0x10
+#define ALLOW_INV_IMM 0x20
+#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM)
+#define ALLOW_NEG_IMM 0x40
+#define ALLOW_DOUBLE_IMM 0x80
/* s/l - store/load (1 bit)
u/s - signed/unsigned (1 bit)
w/b/h/N - word/byte/half/NOT allowed (2 bit)
Storing signed and unsigned values are the same operations. */
-static const sljit_uw data_transfer_insts[16] = {
+static const sljit_ins data_transfer_insts[16] = {
/* s u w */ 0xe5000000 /* str */,
/* s u b */ 0xe5400000 /* strb */,
/* s u h */ 0xe10000b0 /* strh */,
@@ -1022,7 +1095,7 @@ static const sljit_uw data_transfer_insts[16] = {
};
#define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \
- (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_uw)(arg))
+ (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_ins)(arg))
/* Normal ldr/str instruction.
Type2: ldrsb, ldrh, ldrsh */
@@ -1032,7 +1105,7 @@ static const sljit_uw data_transfer_insts[16] = {
(((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
- ((sljit_uw)(opcode) | (sljit_uw)(mode) | VD(dst) | VM(src1) | VN(src2))
+ ((sljit_ins)(opcode) | (sljit_ins)(mode) | VD(dst) | VM(src1) | VN(src2))
/* Flags for emit_op: */
/* Arguments are swapped. */
@@ -1104,12 +1177,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
}
if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
- FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
+ FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
} else {
if (fsaveds > 0)
- FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
+ FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
- FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
+ FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
}
}
@@ -1138,7 +1211,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
else
FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP)
- | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
+ | (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
float_arg_count++;
offset += sizeof(sljit_f64) - sizeof(sljit_sw);
break;
@@ -1147,7 +1220,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
else
FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP)
- | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
+ | (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
float_arg_count++;
break;
default:
@@ -1164,7 +1237,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (offset < 4 * sizeof(sljit_sw))
FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2)));
else
- FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_uw)size - 4 * sizeof(sljit_sw))));
+ FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_ins)size - 4 * sizeof(sljit_sw))));
break;
}
@@ -1217,7 +1290,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
#endif
if (local_size > 0)
- FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
+ FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
return SLJIT_SUCCESS;
}
@@ -1234,6 +1307,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
+ /* Doubles are saved, so alignment is unaffected. */
if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
size += SSIZE_OF(sw);
@@ -1245,13 +1319,8 @@ static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
{
sljit_uw imm2 = get_imm(imm);
- if (imm2 == 0) {
- imm2 = (imm & ~(sljit_uw)0x3ff) >> 10;
- imm = (imm & 0x3ff) >> 2;
-
- FAIL_IF(push_inst(compiler, ADD | SRC2_IMM | RD(SLJIT_SP) | RN(SLJIT_SP) | 0xb00 | imm2));
- return push_inst(compiler, ADD | SRC2_IMM | RD(SLJIT_SP) | RN(SLJIT_SP) | 0xf00 | (imm & 0xff));
- }
+ if (imm2 == 0)
+ return emit_op(compiler, SLJIT_ADD, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, (sljit_sw)imm);
return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2);
}
@@ -1274,12 +1343,12 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
- FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
+ FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
} else {
if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
- FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
+ FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
if (fsaveds > 0)
- FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
+ FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
}
local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
@@ -1330,10 +1399,10 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
if (frame_size == 0)
return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | 0x800008);
if (frame_size > 2 * SSIZE_OF(sw))
- return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)(frame_size - (2 * SSIZE_OF(sw))));
+ return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw))));
}
- FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)local_size));
+ FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)local_size));
tmp = 1;
} else if (frame_size == 0) {
frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
@@ -1349,7 +1418,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
local_size += SSIZE_OF(sw);
if (frame_size > local_size)
- FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_uw)(frame_size - local_size)));
+ FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_ins)(frame_size - local_size)));
else if (frame_size < local_size)
FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
@@ -1361,11 +1430,11 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
if (restored_reg != TMP_REG2)
frame_size -= SSIZE_OF(sw);
- return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)frame_size);
+ return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)frame_size);
}
tmp = (restored_reg == TMP_REG2) ? 0x800004 : 0x800008;
- return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)tmp);
+ return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)tmp);
}
if (local_size > 0)
@@ -1384,7 +1453,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
FAIL_IF(push_inst(compiler, POP | reg_list));
if (frame_size > 0)
- return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_uw)frame_size - sizeof(sljit_sw)));
+ return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_ins)frame_size - sizeof(sljit_sw)));
if (lr_dst != 0)
return SLJIT_SUCCESS;
@@ -1432,7 +1501,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
sljit_s32 is_masked;
sljit_uw shift_type;
- switch (GET_OPCODE(op)) {
+ switch (op) {
case SLJIT_MOV:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
if (dst != src2) {
@@ -1446,17 +1515,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_MOV_U8:
case SLJIT_MOV_S8:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
- if (flags & MOVE_REG_CONV) {
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- if (op == SLJIT_MOV_U8)
- return push_inst(compiler, AND | RD(dst) | RN(src2) | SRC2_IMM | 0xff);
- FAIL_IF(push_inst(compiler, MOV | RD(dst) | (24 << 7) | RM(src2)));
- return push_inst(compiler, MOV | RD(dst) | (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst));
-#else
+ if (flags & MOVE_REG_CONV)
return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
-#endif
- }
- else if (dst != src2) {
+
+ if (dst != src2) {
SLJIT_ASSERT(src2 & SRC2_IMM);
return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
}
@@ -1465,26 +1527,15 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_MOV_U16:
case SLJIT_MOV_S16:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
- if (flags & MOVE_REG_CONV) {
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- FAIL_IF(push_inst(compiler, MOV | RD(dst) | (16 << 7) | RM(src2)));
- return push_inst(compiler, MOV | RD(dst) | (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst));
-#else
+ if (flags & MOVE_REG_CONV)
return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
-#endif
- }
- else if (dst != src2) {
+
+ if (dst != src2) {
SLJIT_ASSERT(src2 & SRC2_IMM);
return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
}
return SLJIT_SUCCESS;
- case SLJIT_NOT:
- if (src2 & SRC2_IMM)
- return push_inst(compiler, ((flags & INV_IMM) ? MOV : MVN) | (flags & SET_FLAGS) | RD(dst) | src2);
-
- return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src2));
-
case SLJIT_CLZ:
SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
@@ -1493,17 +1544,30 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_CTZ:
SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG1) | RN(src2) | 0));
FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | RM(TMP_REG1)));
FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG2)));
FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32));
return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f);
-#else /* !SLJIT_CONFIG_ARM_V5 */
+#else /* !SLJIT_CONFIG_ARM_V6 */
FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2)));
return push_inst(compiler, CLZ | RD(dst) | RM(dst));
-#endif /* SLJIT_CONFIG_ARM_V5 */
+#endif /* SLJIT_CONFIG_ARM_V6 */
+ case SLJIT_REV:
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
+ SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
+ return push_inst(compiler, REV | RD(dst) | RM(src2));
+
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED) && src2 != TMP_REG1 && dst != TMP_REG1);
+ FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RM(src2)));
+ if (dst == TMP_REG2 || (src2 == TMP_REG2 && op == SLJIT_REV_U16))
+ return SLJIT_SUCCESS;
+ return push_inst(compiler, (op == SLJIT_REV_U16 ? UXTH : SXTH) | RD(dst) | RM(dst));
case SLJIT_ADD:
SLJIT_ASSERT(!(flags & INV_IMM));
@@ -1534,7 +1598,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
SLJIT_ASSERT(!(src2 & SRC2_IMM));
compiler->status_flags_state = 0;
- if (!HAS_FLAGS(op))
+ if (!(flags & SET_FLAGS))
return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1));
FAIL_IF(push_inst(compiler, SMULL | RN(TMP_REG1) | RD(dst) | RM8(src2) | RM(src1)));
@@ -1553,25 +1617,28 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_XOR:
- SLJIT_ASSERT(!(flags & INV_IMM));
+ if (flags & INV_IMM) {
+ SLJIT_ASSERT(src2 == SRC2_IMM);
+ return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src1));
+ }
return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_SHL:
case SLJIT_MSHL:
shift_type = 0;
- is_masked = GET_OPCODE(op) == SLJIT_MSHL;
+ is_masked = op == SLJIT_MSHL;
break;
case SLJIT_LSHR:
case SLJIT_MLSHR:
shift_type = 1;
- is_masked = GET_OPCODE(op) == SLJIT_MLSHR;
+ is_masked = op == SLJIT_MLSHR;
break;
case SLJIT_ASHR:
case SLJIT_MASHR:
shift_type = 2;
- is_masked = GET_OPCODE(op) == SLJIT_MASHR;
+ is_masked = op == SLJIT_MASHR;
break;
case SLJIT_ROTL:
@@ -1611,7 +1678,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst)
- | RM8(src2) | (sljit_uw)(shift_type << 5) | 0x10 | RM(src1));
+ | RM8(src2) | (sljit_ins)(shift_type << 5) | 0x10 | RM(src1));
}
#undef EMIT_SHIFT_INS_AND_RETURN
@@ -1628,8 +1695,7 @@ static sljit_uw get_imm(sljit_uw imm)
if (!(imm & 0xff000000)) {
imm <<= 8;
rol = 8;
- }
- else {
+ } else {
imm = (imm << 24) | (imm >> 8);
rol = 0;
}
@@ -1651,22 +1717,19 @@ static sljit_uw get_imm(sljit_uw imm)
if (!(imm & 0x00ffffff))
return SRC2_IMM | (imm >> 24) | (rol << 8);
- else
- return 0;
+ return 0;
}
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
-static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm, sljit_s32 positive)
+static sljit_uw compute_imm(sljit_uw imm, sljit_uw* imm2)
{
sljit_uw mask;
sljit_uw imm1;
- sljit_uw imm2;
sljit_uw rol;
/* Step1: Search a zero byte (8 continous zero bit). */
mask = 0xff000000;
rol = 8;
- while(1) {
+ while (1) {
if (!(imm & mask)) {
/* Rol imm by rol. */
imm = (imm << rol) | (imm >> (32 - rol));
@@ -1674,6 +1737,7 @@ static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sl
rol = 4 + (rol >> 1);
break;
}
+
rol += 2;
mask >>= 2;
if (mask & 0x3) {
@@ -1703,9 +1767,8 @@ static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sl
if (!(imm & 0xff000000)) {
imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
- imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
- }
- else if (imm & 0xc0000000) {
+ *imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
+ } else if (imm & 0xc0000000) {
imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
imm <<= 8;
rol += 4;
@@ -1726,11 +1789,10 @@ static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sl
}
if (!(imm & 0x00ffffff))
- imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
+ *imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
else
return 0;
- }
- else {
+ } else {
if (!(imm & 0xf0000000)) {
imm <<= 4;
rol += 2;
@@ -1756,25 +1818,23 @@ static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sl
}
if (!(imm & 0x00ffffff))
- imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
+ *imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
else
return 0;
}
- FAIL_IF(push_inst(compiler, (positive ? MOV : MVN) | RD(reg) | imm1));
- FAIL_IF(push_inst(compiler, (positive ? ORR : BIC) | RD(reg) | RN(reg) | imm2));
- return 1;
+ return imm1;
}
-#endif
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm)
{
sljit_uw tmp;
-
-#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
+ sljit_uw imm1, imm2;
+#else /* !SLJIT_CONFIG_ARM_V6 */
if (!(imm & ~(sljit_uw)0xffff))
return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
/* Create imm by 1 inst. */
tmp = get_imm(imm);
@@ -1785,19 +1845,28 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
if (tmp)
return push_inst(compiler, MVN | RD(reg) | tmp);
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
/* Create imm by 2 inst. */
- FAIL_IF(generate_int(compiler, reg, imm, 1));
- FAIL_IF(generate_int(compiler, reg, ~imm, 0));
+ imm1 = compute_imm(imm, &imm2);
+ if (imm1 != 0) {
+ FAIL_IF(push_inst(compiler, MOV | RD(reg) | imm1));
+ return push_inst(compiler, ORR | RD(reg) | RN(reg) | imm2);
+ }
+
+ imm1 = compute_imm(~imm, &imm2);
+ if (imm1 != 0) {
+ FAIL_IF(push_inst(compiler, MVN | RD(reg) | imm1));
+ return push_inst(compiler, BIC | RD(reg) | RN(reg) | imm2);
+ }
/* Load integer. */
return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm);
-#else
+#else /* !SLJIT_CONFIG_ARM_V6 */
FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
if (imm <= 0xffff)
return SLJIT_SUCCESS;
return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
}
static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
@@ -1834,13 +1903,13 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s
argw &= 0x3;
if (argw != 0 && (mask == 0xff)) {
- FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_uw)argw << 7)));
+ FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_ins)argw << 7)));
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
}
/* Bit 25: RM is offset. */
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
- RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_uw)argw << 7)));
+ RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_ins)argw << 7)));
}
arg &= REG_MASK;
@@ -1902,10 +1971,16 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
/* We prefers register and simple consts. */
sljit_s32 dst_reg;
- sljit_s32 src1_reg;
+ sljit_s32 src1_reg = 0;
sljit_s32 src2_reg = 0;
sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
sljit_s32 neg_op = 0;
+ sljit_u32 imm2;
+
+ op = GET_OPCODE(op);
+
+ if (flags & SET_FLAGS)
+ inp_flags &= ~ALLOW_DOUBLE_IMM;
if (dst == TMP_REG2)
flags |= UNUSED_RETURN;
@@ -1913,7 +1988,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
if (inp_flags & ALLOW_NEG_IMM) {
- switch (GET_OPCODE(op)) {
+ switch (op) {
case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
neg_op = SLJIT_SUB;
@@ -1937,10 +2012,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
if (!(inp_flags & ALLOW_IMM))
break;
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
src2_reg = (sljit_s32)get_imm((sljit_uw)src2w);
if (src2_reg)
break;
+
if (inp_flags & ALLOW_INV_IMM) {
src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w);
if (src2_reg) {
@@ -1948,8 +2024,9 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
break;
}
}
+
if (neg_op != 0) {
- src2_reg = (sljit_s32)get_imm((sljit_uw)-src2w);
+ src2_reg = (sljit_s32)get_imm((neg_op == SLJIT_ADD || neg_op == SLJIT_SUB) ? (sljit_uw)-src2w : ~(sljit_uw)src2w);
if (src2_reg) {
op = neg_op | GET_ALL_FLAGS(op);
break;
@@ -1957,7 +2034,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
}
}
- if (src1 & SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
src2_reg = (sljit_s32)get_imm((sljit_uw)src1w);
if (src2_reg) {
flags |= ARGS_SWAPPED;
@@ -1965,6 +2042,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
src1w = src2w;
break;
}
+
if (inp_flags & ALLOW_INV_IMM) {
src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w);
if (src2_reg) {
@@ -1974,8 +2052,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
break;
}
}
+
if (neg_op >= SLJIT_SUB) {
/* Note: additive operation (commutative). */
+ SLJIT_ASSERT(op == SLJIT_ADD || op == SLJIT_ADDC);
+
src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w);
if (src2_reg) {
src1 = src2;
@@ -1993,8 +2074,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
else if (src1 & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
src1_reg = TMP_REG1;
- }
- else {
+ } else if (!(inp_flags & ALLOW_DOUBLE_IMM) || src2_reg != 0 || op == SLJIT_SUB || op == SLJIT_SUBC) {
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
src1_reg = TMP_REG1;
}
@@ -2023,8 +2103,62 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
src2_reg = src2;
else if (src2 & SLJIT_MEM)
FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2));
- else
+ else if (!(inp_flags & ALLOW_DOUBLE_IMM))
FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w));
+ else {
+ SLJIT_ASSERT(!(flags & SET_FLAGS));
+
+ if (src1_reg == 0) {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
+ src1_reg = TMP_REG1;
+ }
+
+ src2_reg = (sljit_s32)compute_imm((sljit_uw)src2w, &imm2);
+
+ if (src2_reg == 0 && neg_op != 0) {
+ src2_reg = (sljit_s32)compute_imm((sljit_uw)-src2w, &imm2);
+ if (src2_reg != 0)
+ op = neg_op;
+ }
+
+ if (src2_reg == 0) {
+ FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)src2w));
+ src2_reg = TMP_REG2;
+ } else {
+ FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
+ src1_reg = dst_reg;
+ src2_reg = (sljit_s32)imm2;
+
+ if (op == SLJIT_ADDC)
+ op = SLJIT_ADD;
+ else if (op == SLJIT_SUBC)
+ op = SLJIT_SUB;
+ }
+ }
+ }
+
+ if (src1_reg == 0) {
+ SLJIT_ASSERT((inp_flags & ALLOW_DOUBLE_IMM) && !(flags & SET_FLAGS));
+
+ src1_reg = (sljit_s32)compute_imm((sljit_uw)src1w, &imm2);
+
+ if (src1_reg == 0 && neg_op != 0) {
+ src1_reg = (sljit_s32)compute_imm((sljit_uw)-src1w, &imm2);
+ if (src1_reg != 0)
+ op = neg_op;
+ }
+
+ if (src1_reg == 0) {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
+ src1_reg = TMP_REG1;
+ } else {
+ FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src2_reg, (sljit_uw)src1_reg));
+ src1_reg = dst_reg;
+ src2_reg = (sljit_s32)imm2;
+
+ if (op == SLJIT_ADDC)
+ op = SLJIT_ADD;
+ }
}
FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
@@ -2114,7 +2248,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
SLJIT_ASSERT(saved_reg_list[1] < 8);
FAIL_IF(push_inst(compiler, LDR | 0x8d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */));
}
- return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_uw)(saved_reg_count >= 3 ? 16 : 8)
+ return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_ins)(saved_reg_count >= 3 ? 16 : 8)
| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
}
return SLJIT_SUCCESS;
@@ -2144,23 +2278,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
case SLJIT_MOV_U8:
- return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
case SLJIT_MOV_S8:
- return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
case SLJIT_MOV_U16:
- return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
case SLJIT_MOV_S16:
- return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
-
- case SLJIT_NOT:
- return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
+ return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
case SLJIT_CLZ:
case SLJIT_CTZ:
+ case SLJIT_REV:
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
+
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ return emit_op(compiler, op, HALF_SIZE, dst, dstw, TMP_REG1, 0, src, srcw);
}
return SLJIT_SUCCESS;
@@ -2171,6 +2309,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
+ sljit_s32 inp_flags;
+
CHECK_ERROR();
CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
ADJUST_LOCAL_OFFSET(dst, dstw);
@@ -2182,11 +2322,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
case SLJIT_ADDC:
case SLJIT_SUB:
case SLJIT_SUBC:
- return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM, dst, dstw, src1, src1w, src2, src2w);
+ return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_OR:
+ return emit_op(compiler, op, ALLOW_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
+
case SLJIT_XOR:
- return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w);
+ inp_flags = ALLOW_IMM | ALLOW_DOUBLE_IMM;
+ if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) {
+ inp_flags |= ALLOW_INV_IMM;
+ }
+ return emit_op(compiler, op, inp_flags, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_MUL:
return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
@@ -2202,7 +2348,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
case SLJIT_MASHR:
case SLJIT_ROTL:
case SLJIT_ROTR:
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
compiler->shift_imm = src2w & 0x1f;
return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
} else {
@@ -2226,60 +2372,52 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 src_dst,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
+ sljit_s32 dst_reg,
+ sljit_s32 src1_reg,
+ sljit_s32 src2_reg,
+ sljit_s32 src3, sljit_sw src3w)
{
sljit_s32 is_left;
CHECK_ERROR();
- CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
+ CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
op = GET_OPCODE(op);
is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
- if (src_dst == src1) {
+ if (src1_reg == src2_reg) {
SLJIT_SKIP_CHECKS(compiler);
- return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, src_dst, 0, src_dst, 0, src2, src2w);
+ return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
}
- ADJUST_LOCAL_OFFSET(src1, src1w);
- ADJUST_LOCAL_OFFSET(src2, src2w);
+ ADJUST_LOCAL_OFFSET(src3, src3w);
/* Shift type of ROR is 3. */
- if (src2 & SLJIT_IMM) {
- src2w &= 0x1f;
+ if (src3 == SLJIT_IMM) {
+ src3w &= 0x1f;
- if (src2w == 0)
+ if (src3w == 0)
return SLJIT_SUCCESS;
- } else if (src2 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src2, src2w, TMP_REG2));
- src2 = TMP_REG2;
- }
- if (src1 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
- src1 = TMP_REG1;
- } else if (src1 & SLJIT_IMM) {
- FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
- src1 = TMP_REG1;
+ FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src1_reg) | ((sljit_ins)(is_left ? 0 : 1) << 5) | ((sljit_ins)src3w << 7)));
+ src3w = (src3w ^ 0x1f) + 1;
+ return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | ((sljit_ins)src3w << 7));
}
- if (src2 & SLJIT_IMM) {
- FAIL_IF(push_inst(compiler, MOV | RD(src_dst) | RM(src_dst) | ((sljit_uw)(is_left ? 0 : 1) << 5) | ((sljit_uw)src2w << 7)));
- src2w = (src2w ^ 0x1f) + 1;
- return push_inst(compiler, ORR | RD(src_dst) | RN(src_dst) | RM(src1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | ((sljit_uw)src2w << 7));
+ if (src3 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src3, src3w, TMP_REG2));
+ src3 = TMP_REG2;
}
- if (op == SLJIT_MSHL || op == SLJIT_MLSHR) {
- FAIL_IF(push_inst(compiler, AND | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0x1f));
- src2 = TMP_REG2;
+ if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
+ FAIL_IF(push_inst(compiler, AND | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
+ src3 = TMP_REG2;
}
- FAIL_IF(push_inst(compiler, MOV | RD(src_dst) | RM8(src2) | ((sljit_uw)(is_left ? 0 : 1) << 5) | 0x10 | RM(src_dst)));
- FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | (1 << 7)));
- FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0x1f));
- return push_inst(compiler, ORR | RD(src_dst) | RN(src_dst) | RM(TMP_REG1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | 0x10 | RM8(TMP_REG2));
+ FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM8(src3) | ((sljit_ins)(is_left ? 0 : 1) << 5) | 0x10 | RM(src1_reg)));
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | (1 << 7)));
+ FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
+ return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM8(TMP_REG2) | ((sljit_ins)(is_left ? 1 : 0) << 5) | 0x10 | RM(TMP_REG1));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
@@ -2305,27 +2443,67 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
case SLJIT_PREFETCH_L2:
case SLJIT_PREFETCH_L3:
case SLJIT_PREFETCH_ONCE:
-#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
SLJIT_ASSERT(src & SLJIT_MEM);
return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
-#else /* !SLJIT_CONFIG_ARM_V7 */
- return SLJIT_SUCCESS;
-#endif /* SLJIT_CONFIG_ARM_V7 */
}
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw)
{
- CHECK_REG_INDEX(check_sljit_get_register_index(reg));
- return reg_map[reg];
+ sljit_s32 size, dst_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ switch (op) {
+ case SLJIT_FAST_ENTER:
+ SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
+
+ if (FAST_IS_REG(dst))
+ return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2));
+ break;
+ case SLJIT_GET_RETURN_ADDRESS:
+ size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
+
+ if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
+ /* The size of pc is not added above. */
+ if ((size & SSIZE_OF(sw)) == 0)
+ size += SSIZE_OF(sw);
+
+ size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
+ }
+
+ SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
+ break;
+ }
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1);
+
+ return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
{
- CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
- return (freg_map[reg] << 1);
+ CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
+
+ if (type == SLJIT_GP_REGISTER)
+ return reg_map[reg];
+
+ if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
+ return freg_map[reg];
+
+ if (type != SLJIT_SIMD_REG_128)
+ return freg_map[reg] & ~0x1;
+
+ return -1;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -2335,7 +2513,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
CHECK_ERROR();
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
- return push_inst(compiler, *(sljit_uw*)instruction);
+ return push_inst(compiler, *(sljit_ins*)instruction);
}
/* --------------------------------------------------------------------- */
@@ -2344,18 +2522,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
#define FPU_LOAD (1 << 20)
#define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
- ((inst) | (sljit_uw)((add) << 23) | RN(base) | VD(freg) | (sljit_uw)(offs))
+ ((inst) | (sljit_ins)((add) << 23) | RN(base) | VD(freg) | (sljit_ins)(offs))
static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
{
sljit_uw imm;
- sljit_uw inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
+ sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
SLJIT_ASSERT(arg & SLJIT_MEM);
arg &= ~SLJIT_MEM;
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
- FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 7)));
+ FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_ins)argw & 0x3) << 7)));
arg = TMP_REG2;
argw = 0;
}
@@ -2410,14 +2588,12 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
}
-static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- op ^= SLJIT_32;
-
if (FAST_IS_REG(src))
FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1)));
else if (src & SLJIT_MEM) {
@@ -2429,13 +2605,27 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1)));
}
- FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_32, dst_r, TMP_FREG1, 0)));
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(ins, ins & SLJIT_32, dst_r, TMP_FREG1, 0)));
if (dst & SLJIT_MEM)
- return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw);
+ return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
return SLJIT_SUCCESS;
}
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
+}
+
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
+}
+
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
@@ -2453,7 +2643,12 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
}
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0)));
- return push_inst(compiler, VMRS);
+ FAIL_IF(push_inst(compiler, VMRS));
+
+ if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
+ return SLJIT_SUCCESS;
+
+ return push_inst(compiler, (CMP - CONDITIONAL) | (0x60000000 /* VS */) | SET_FLAGS | RN(TMP_REG1) | RM(TMP_REG1));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -2534,18 +2729,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
case SLJIT_ADD_F64:
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1)));
break;
-
case SLJIT_SUB_F64:
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1)));
break;
-
case SLJIT_MUL_F64:
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1)));
break;
-
case SLJIT_DIV_F64:
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1)));
break;
+ case SLJIT_COPYSIGN_F64:
+ FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(src2) | RD(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src1, 0)));
+ FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | SRC2_IMM | 0));
+ return push_inst(compiler, EMIT_FPU_OPERATION((VNEG_F32 & ~COND_MASK) | 0xb0000000, op & SLJIT_32, dst_r, dst_r, 0));
}
if (dst_r == TMP_FREG1)
@@ -2556,42 +2753,120 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
#undef EMIT_FPU_DATA_TRANSFER
-/* --------------------------------------------------------------------- */
-/* Other instructions */
-/* --------------------------------------------------------------------- */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f32 value)
+{
+#if defined(__ARM_NEON) && __ARM_NEON
+ sljit_u32 exp;
+ sljit_ins ins;
+#endif /* NEON */
+ union {
+ sljit_u32 imm;
+ sljit_f32 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset32(compiler, freg, value));
+
+ u.value = value;
+
+#if defined(__ARM_NEON) && __ARM_NEON
+ if ((u.imm << (32 - 19)) == 0) {
+ exp = (u.imm >> (23 + 2)) & 0x3f;
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+ if (exp == 0x20 || exp == 0x1f) {
+ ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
+ return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
+ }
+ }
+#endif /* NEON */
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
+ return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG1));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
{
+#if defined(__ARM_NEON) && __ARM_NEON
+ sljit_u32 exp;
+ sljit_ins ins;
+#endif /* NEON */
+ union {
+ sljit_u32 imm[2];
+ sljit_f64 value;
+ } u;
+
CHECK_ERROR();
- CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
- ADJUST_LOCAL_OFFSET(dst, dstw);
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
- SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
+ u.value = value;
- if (FAST_IS_REG(dst))
- return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2));
+#if defined(__ARM_NEON) && __ARM_NEON
+ if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
+ exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
+
+ if (exp == 0x100 || exp == 0xff) {
+ ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
+ return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
+ }
+ }
+#endif /* NEON */
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
+ if (u.imm[0] == u.imm[1])
+ return push_inst(compiler, VMOV2 | RN(TMP_REG1) | RD(TMP_REG1) | VM(freg));
+
+ FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
+ return push_inst(compiler, VMOV2 | RN(TMP_REG2) | RD(TMP_REG1) | VM(freg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ sljit_s32 reg2;
+ sljit_ins inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ if (reg & REG_PAIR_MASK) {
+ reg2 = REG_PAIR_SECOND(reg);
+ reg = REG_PAIR_FIRST(reg);
+
+ inst = VMOV2 | RN(reg) | RD(reg2) | VM(freg);
+ } else {
+ inst = VMOV | VN(freg) | RD(reg);
- /* Memory. */
- return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1);
+ if (!(op & SLJIT_32))
+ inst |= 1 << 7;
+ }
+
+ if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
+ inst |= 1 << 20;
+
+ return push_inst(compiler, inst);
}
/* --------------------------------------------------------------------- */
/* Conditional instructions */
/* --------------------------------------------------------------------- */
-static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
+static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
{
switch (type) {
case SLJIT_EQUAL:
+ case SLJIT_ATOMIC_STORED:
case SLJIT_F_EQUAL:
case SLJIT_ORDERED_EQUAL:
- case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */
+ case SLJIT_UNORDERED_OR_EQUAL:
return 0x00000000;
case SLJIT_NOT_EQUAL:
+ case SLJIT_ATOMIC_NOT_STORED:
case SLJIT_F_NOT_EQUAL:
case SLJIT_UNORDERED_OR_NOT_EQUAL:
- case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */
+ case SLJIT_ORDERED_NOT_EQUAL:
return 0x10000000;
case SLJIT_CARRY:
@@ -2696,7 +2971,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
if (type >= SLJIT_FAST_CALL)
PTR_FAIL_IF(prepare_blx(compiler));
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
@@ -2714,13 +2989,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
jump->addr = compiler->size;
-#else
+#else /* !SLJIT_CONFIG_ARM_V6 */
if (type >= SLJIT_FAST_CALL)
jump->flags |= IS_BL;
PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type)));
jump->addr = compiler->size;
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
return jump;
}
@@ -2738,7 +3013,7 @@ static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit
sljit_u8 *offset_ptr = offsets;
if (src && FAST_IS_REG(*src))
- src_offset = (sljit_uw)reg_map[*src] * sizeof(sljit_sw);
+ src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw);
arg_types >>= SLJIT_ARG_SHIFT;
@@ -2773,7 +3048,7 @@ static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit
if (is_tail_call)
offset += sizeof(sljit_sw);
- offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7;
+ offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_u32)0x7;
*extra_space = offset;
@@ -2903,8 +3178,6 @@ static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit
#endif /* __SOFTFP__ */
-#undef EMIT_FPU_OPERATION
-
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 arg_types)
{
@@ -2971,7 +3244,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
if (FAST_IS_REG(src)) {
SLJIT_ASSERT(reg_map[src] != 14);
return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
@@ -2988,16 +3261,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
jump->u.target = (sljit_uw)srcw;
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
if (type >= SLJIT_FAST_CALL)
FAIL_IF(prepare_blx(compiler));
FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
if (type >= SLJIT_FAST_CALL)
FAIL_IF(emit_blx(compiler));
-#else
+#else /* !SLJIT_CONFIG_ARM_V6 */
FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
jump->addr = compiler->size;
return SLJIT_SUCCESS;
}
@@ -3096,7 +3369,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
sljit_s32 type)
{
sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
- sljit_uw cc, ins;
+ sljit_ins cc, ins;
CHECK_ERROR();
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
@@ -3132,61 +3405,114 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_reg)
{
- sljit_uw cc, tmp;
+ sljit_ins cc, tmp;
CHECK_ERROR();
- CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+ CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ if (src2_reg != dst_reg && src1 == dst_reg) {
+ src1 = src2_reg;
+ src1w = 0;
+ src2_reg = dst_reg;
+ type ^= 0x1;
+ }
+
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG2));
+
+ if (src2_reg != dst_reg) {
+ src1 = src2_reg;
+ src1w = 0;
+ type ^= 0x1;
+ } else {
+ src1 = TMP_REG1;
+ src1w = 0;
+ }
+ } else if (dst_reg != src2_reg)
+ FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src2_reg)));
cc = get_cc(compiler, type & ~SLJIT_32);
- if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
- tmp = get_imm((sljit_uw)srcw);
+ if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
+ tmp = get_imm((sljit_uw)src1w);
if (tmp)
return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
- tmp = get_imm(~(sljit_uw)srcw);
+ tmp = get_imm(~(sljit_uw)src1w);
if (tmp)
return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
- tmp = (sljit_uw)srcw;
+ tmp = (sljit_ins)src1w;
FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff)));
if (tmp <= 0xffff)
return SLJIT_SUCCESS;
return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff));
-#else
- FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
- src = TMP_REG1;
-#endif
+#else /* !SLJIT_CONFIG_ARM_V7 */
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
+ src1 = TMP_REG1;
+#endif /* SLJIT_CONFIG_ARM_V7 */
+ }
+
+ return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src1)) & ~COND_MASK) | cc);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_freg)
+{
+ sljit_ins cc;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ type ^= SLJIT_32;
+
+ if (dst_freg != src2_freg) {
+ if (dst_freg == src1) {
+ src1 = src2_freg;
+ src1w = 0;
+ type ^= 0x1;
+ } else
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, (type & SLJIT_32), dst_freg, src2_freg, 0)));
+ }
+
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
+ src1 = TMP_FREG1;
}
- return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src)) & ~COND_MASK) | cc);
+ cc = get_cc(compiler, type & ~SLJIT_32);
+ return push_inst(compiler, EMIT_FPU_OPERATION((VMOV_F32 & ~COND_MASK) | cc, (type & SLJIT_32), dst_freg, src1, 0));
}
+#undef EMIT_FPU_OPERATION
+
static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
{
sljit_s32 arg = *mem;
sljit_sw argw = *memw;
sljit_uw imm, tmp;
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- sljit_sw mask = max_offset >= 0xf00 ? 0xfff : 0xff;
- sljit_sw sign = max_offset >= 0xf00 ? 0x1000 : 0x100;
-#else /* !SLJIT_CONFIG_ARM_V5 */
sljit_sw mask = 0xfff;
sljit_sw sign = 0x1000;
SLJIT_ASSERT(max_offset >= 0xf00);
-#endif /* SLJIT_CONFIG_ARM_V5 */
*mem = TMP_REG1;
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
*memw = 0;
- return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 7));
+ return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)(argw & 0x3) << 7));
}
arg &= REG_MASK;
@@ -3234,158 +3560,6 @@ static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem
return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(arg));
}
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
-
-static sljit_s32 sljit_emit_mem_unaligned(struct sljit_compiler *compiler, sljit_s32 type,
- sljit_s32 reg,
- sljit_s32 mem, sljit_sw memw)
-{
- sljit_s32 flags, steps, tmp_reg;
- sljit_uw add, shift;
-
- switch (type & 0xff) {
- case SLJIT_MOV_U8:
- case SLJIT_MOV_S8:
- flags = BYTE_SIZE;
- if (!(type & SLJIT_MEM_STORE))
- flags |= LOAD_DATA;
- if ((type & 0xff) == SLJIT_MOV_S8)
- flags |= SIGNED;
-
- return emit_op_mem(compiler, flags, reg, mem, memw, TMP_REG1);
-
- case SLJIT_MOV_U16:
- FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 1));
- flags = BYTE_SIZE;
- steps = 1;
- break;
-
- case SLJIT_MOV_S16:
- FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xff - 1));
- flags = BYTE_SIZE | SIGNED;
- steps = 1;
- break;
-
- default:
- if (type & SLJIT_MEM_UNALIGNED_32) {
- flags = WORD_SIZE;
- if (!(type & SLJIT_MEM_STORE))
- flags |= LOAD_DATA;
-
- return emit_op_mem(compiler, flags, reg, mem, memw, TMP_REG1);
- }
-
- if (!(type & SLJIT_MEM_UNALIGNED_16)) {
- FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 3));
- flags = BYTE_SIZE;
- steps = 3;
- break;
- }
-
- FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xff - 2));
-
- add = 1;
- if (memw < 0) {
- add = 0;
- memw = -memw;
- }
-
- tmp_reg = reg;
-
- if (type & SLJIT_MEM_STORE) {
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE, add, reg, mem, TYPE2_TRANSFER_IMM(memw))));
- FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(reg) | (16 << 7) | (2 << 4)));
- } else {
- if (reg == mem) {
- SLJIT_ASSERT(reg != TMP_REG1);
- tmp_reg = TMP_REG1;
- }
-
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE | LOAD_DATA, add, tmp_reg, mem, TYPE2_TRANSFER_IMM(memw))));
- }
-
- if (!add) {
- memw -= 2;
- if (memw <= 0) {
- memw = -memw;
- add = 1;
- }
- } else
- memw += 2;
-
- if (type & SLJIT_MEM_STORE)
- return push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw)));
-
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE | LOAD_DATA, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw))));
- return push_inst(compiler, ORR | RD(reg) | RN(tmp_reg) | RM(TMP_REG2) | (16 << 7));
- }
-
- SLJIT_ASSERT(steps > 0);
-
- add = 1;
- if (memw < 0) {
- add = 0;
- memw = -memw;
- }
-
- if (type & SLJIT_MEM_STORE) {
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE, add, reg, mem, memw)));
- FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(reg) | (8 << 7) | (2 << 4)));
-
- while (1) {
- if (!add) {
- memw -= 1;
- if (memw == 0)
- add = 1;
- } else
- memw += 1;
-
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE, add, TMP_REG2, mem, memw)));
-
- if (--steps == 0)
- return SLJIT_SUCCESS;
-
- FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(TMP_REG2) | (8 << 7) | (2 << 4)));
- }
- }
-
- tmp_reg = reg;
-
- if (reg == mem) {
- SLJIT_ASSERT(reg != TMP_REG1);
- tmp_reg = TMP_REG1;
- }
-
- shift = 8;
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE | LOAD_DATA, add, tmp_reg, mem, memw)));
-
- do {
- if (!add) {
- memw -= 1;
- if (memw == 0)
- add = 1;
- } else
- memw += 1;
-
- if (steps > 1) {
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE | LOAD_DATA, add, TMP_REG2, mem, memw)));
- FAIL_IF(push_inst(compiler, ORR | RD(tmp_reg) | RN(tmp_reg) | RM(TMP_REG2) | (shift << 7)));
- shift += 8;
- }
- } while (--steps != 0);
-
- flags |= LOAD_DATA;
-
- if (flags & SIGNED)
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(flags, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw))));
- else
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(flags, add, TMP_REG2, mem, memw)));
-
- return push_inst(compiler, ORR | RD(reg) | RN(tmp_reg) | RM(TMP_REG2) | (shift << 7));
-}
-
-#endif /* SLJIT_CONFIG_ARM_V5 */
-
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 reg,
sljit_s32 mem, sljit_sw memw)
@@ -3395,30 +3569,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
CHECK_ERROR();
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
- if (!(reg & REG_PAIR_MASK)) {
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- ADJUST_LOCAL_OFFSET(mem, memw);
-#endif /* SLJIT_CONFIG_ARM_V5 */
-
+ if (!(reg & REG_PAIR_MASK))
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
- }
ADJUST_LOCAL_OFFSET(mem, memw);
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16)) {
- FAIL_IF(update_mem_addr(compiler, &mem, &memw, (type & SLJIT_MEM_UNALIGNED_16) ? 0xfff - 6 : 0xfff - 7));
-
- if (!(type & SLJIT_MEM_STORE) && REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
- FAIL_IF(sljit_emit_mem_unaligned(compiler, type, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw)));
- return sljit_emit_mem_unaligned(compiler, type, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw);
- }
-
- FAIL_IF(sljit_emit_mem_unaligned(compiler, type, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw));
- return sljit_emit_mem_unaligned(compiler, type, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw));
- }
-#endif /* SLJIT_CONFIG_ARM_V5 */
-
FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
flags = WORD_SIZE;
@@ -3441,7 +3596,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *
sljit_s32 mem, sljit_sw memw)
{
sljit_s32 flags;
- sljit_uw is_type1_transfer, inst;
+ sljit_ins is_type1_transfer, inst;
CHECK_ERROR();
CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
@@ -3500,7 +3655,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
memw &= 0x3;
- inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_uw)memw << 7));
+ inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_ins)memw << 7));
if (is_type1_transfer)
inst |= (1 << 25);
@@ -3526,7 +3681,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *
else
memw = -memw;
- return push_inst(compiler, inst | (sljit_uw)memw);
+ return push_inst(compiler, inst | (sljit_ins)memw);
}
if (memw >= 0)
@@ -3534,106 +3689,752 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *
else
memw = -memw;
- return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_uw)memw));
+ return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_ins)memw));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 freg,
sljit_s32 mem, sljit_sw memw)
{
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- sljit_s32 max_offset;
- sljit_s32 dst;
-#endif /* SLJIT_CONFIG_ARM_V5 */
-
CHECK_ERROR();
CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
- if (type & SLJIT_MEM_UNALIGNED_32)
+ if (type & SLJIT_MEM_ALIGNED_32)
return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
if (type & SLJIT_MEM_STORE) {
FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2)));
if (type & SLJIT_32)
- return sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_UNALIGNED_16), TMP_REG2, mem, memw);
-
- max_offset = 0xfff - 7;
- if (type & SLJIT_MEM_UNALIGNED_16)
- max_offset++;
+ return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1);
- FAIL_IF(update_mem_addr(compiler, &mem, &memw, max_offset));
+ FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
mem |= SLJIT_MEM;
- FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_UNALIGNED_16), TMP_REG2, mem, memw));
-
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2)));
- return sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_UNALIGNED_16), TMP_REG2, mem, memw + 4);
+ return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw + 4, TMP_REG1);
}
- max_offset = (type & SLJIT_32) ? 0xfff - 3 : 0xfff - 7;
- if (type & SLJIT_MEM_UNALIGNED_16)
- max_offset++;
+ if (type & SLJIT_32) {
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
+ return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG2));
+ }
+
+ FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
+ mem |= SLJIT_MEM;
+
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, mem, memw + 4, TMP_REG1));
+ return push_inst(compiler, VMOV2 | VM(freg) | RD(TMP_REG2) | RN(TMP_REG1));
+}
- FAIL_IF(update_mem_addr(compiler, &mem, &memw, max_offset));
+static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
+{
+ sljit_s32 mem = *mem_ptr;
+ sljit_uw imm;
- dst = TMP_REG1;
+ if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
+ *mem_ptr = TMP_REG1;
+ return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 7));
+ }
- /* Stack offset adjustment is not needed because dst
- is not stored on the stack when mem is SLJIT_SP. */
+ if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
+ *mem_ptr = TMP_REG1;
+ return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
+ }
- if (mem == TMP_REG1) {
- dst = SLJIT_R3;
+ mem &= REG_MASK;
- if (compiler->scratches >= 4)
- FAIL_IF(push_inst(compiler, STR | (1 << 21) | RN(SLJIT_SP) | RD(SLJIT_R3) | 8));
+ if (memw == 0) {
+ *mem_ptr = mem;
+ return SLJIT_SUCCESS;
}
- mem |= SLJIT_MEM;
+ *mem_ptr = TMP_REG1;
+ imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
+
+ if (imm != 0)
+ return push_inst(compiler, ((memw < 0) ? SUB : ADD) | RD(TMP_REG1) | RN(mem) | imm);
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
+ return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem));
+}
+
+static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
+{
+ freg += freg & 0x1;
+
+ SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
+
+ if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
+ freg--;
+
+ return freg;
+}
+
+#define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
- FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | (type & SLJIT_MEM_UNALIGNED_16), dst, mem, memw));
- FAIL_IF(push_inst(compiler, VMOV | VN(freg) | RD(dst)));
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ freg = simd_get_quad_reg_index(freg);
+
+ if (!(srcdst & SLJIT_MEM)) {
+ if (reg_size == 4)
+ srcdst = simd_get_quad_reg_index(srcdst);
- if (!(type & SLJIT_32)) {
- FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | (type & SLJIT_MEM_UNALIGNED_16), dst, mem, memw + 4));
- FAIL_IF(push_inst(compiler, VMOV | VN(freg) | 0x80 | RD(dst)));
+ if (type & SLJIT_SIMD_STORE)
+ ins = VD(srcdst) | VN(freg) | VM(freg);
+ else
+ ins = VD(freg) | VN(srcdst) | VM(srcdst);
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 6;
+
+ return push_inst(compiler, VORR | ins);
}
- if (dst == SLJIT_R3 && compiler->scratches >= 4)
- FAIL_IF(push_inst(compiler, (LDR ^ (0x1 << 24)) | (0x1 << 23) | RN(SLJIT_SP) | RD(SLJIT_R3) | 8));
- return SLJIT_SUCCESS;
-#else /* !SLJIT_CONFIG_ARM_V5 */
- if (type & SLJIT_MEM_STORE) {
- FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2)));
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
- if (type & SLJIT_32)
- return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1);
+ if (elem_size > 3)
+ elem_size = 3;
- FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
- mem |= SLJIT_MEM;
+ ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD(freg)
+ | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
- FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2)));
- return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw + 4, TMP_REG1);
+ SLJIT_ASSERT(reg_size >= alignment);
+
+ if (alignment == 3)
+ ins |= 0x10;
+ else if (alignment >= 3)
+ ins |= 0x20;
+
+ return push_inst(compiler, ins | RN(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
+}
+
+static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
+{
+ sljit_ins result;
+
+ if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
+ elem_size = 1;
+ value = (sljit_u16)value;
}
- if (type & SLJIT_32) {
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
- return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG2));
+ if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
+ elem_size = 0;
+ value = (sljit_u8)value;
}
- FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
- mem |= SLJIT_MEM;
+ switch (elem_size) {
+ case 0:
+ SLJIT_ASSERT(value <= 0xff);
+ result = 0xe00;
+ break;
+ case 1:
+ SLJIT_ASSERT(value <= 0xffff);
+ result = 0;
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, mem, memw + 4, TMP_REG1));
- return push_inst(compiler, VMOV2 | VM(freg) | RD(TMP_REG2) | RN(TMP_REG1));
-#endif /* SLJIT_CONFIG_ARM_V5 */
+ while (1) {
+ if (value <= 0xff) {
+ result |= 0x800;
+ break;
+ }
+
+ if ((value & 0xff) == 0) {
+ value >>= 8;
+ result |= 0xa00;
+ break;
+ }
+
+ if (result != 0)
+ return ~(sljit_ins)0;
+
+ value ^= (sljit_uw)0xffff;
+ result = (1 << 5);
+ }
+ break;
+ default:
+ SLJIT_ASSERT(value <= 0xffffffff);
+ result = 0;
+
+ while (1) {
+ if (value <= 0xff) {
+ result |= 0x000;
+ break;
+ }
+
+ if ((value & ~(sljit_uw)0xff00) == 0) {
+ value >>= 8;
+ result |= 0x200;
+ break;
+ }
+
+ if ((value & ~(sljit_uw)0xff0000) == 0) {
+ value >>= 16;
+ result |= 0x400;
+ break;
+ }
+
+ if ((value & ~(sljit_uw)0xff000000) == 0) {
+ value >>= 24;
+ result |= 0x600;
+ break;
+ }
+
+ if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
+ value >>= 8;
+ result |= 0xc00;
+ break;
+ }
+
+ if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
+ value >>= 16;
+ result |= 0xd00;
+ break;
+ }
+
+ if (result != 0)
+ return ~(sljit_ins)0;
+
+ value = ~value;
+ result = (1 << 5);
+ }
+ break;
+ }
+
+ return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 17) | result;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins, imm;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ freg = simd_get_quad_reg_index(freg);
+
+ if (src == SLJIT_IMM && srcw == 0)
+ return push_inst(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD(freg));
+
+ if (SLJIT_UNLIKELY(elem_size == 3)) {
+ SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw));
+ src = freg;
+ } else if (freg != src)
+ FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)));
+
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+
+ if (freg != src)
+ return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src));
+ return SLJIT_SUCCESS;
+ }
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
+
+ ins = (sljit_ins)(elem_size << 6);
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 5;
+
+ return push_inst(compiler, VLD1_r | ins | VD(freg) | RN(src) | 0xf);
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ SLJIT_ASSERT(elem_size == 2);
+ ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 6;
+
+ return push_inst(compiler, VDUP_s | ins | VD(freg) | (sljit_ins)freg_map[src]);
+ }
+
+ if (src == SLJIT_IMM) {
+ if (elem_size < 2)
+ srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
+
+ imm = simd_get_imm(elem_size, (sljit_uw)srcw);
+
+ if (imm != ~(sljit_ins)0) {
+ if (reg_size == 4)
+ imm |= (sljit_ins)1 << 6;
+
+ return push_inst(compiler, VMOV_i | imm | VD(freg));
+ }
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
+ src = TMP_REG1;
+ }
+
+ switch (elem_size) {
+ case 0:
+ ins = 1 << 22;
+ break;
+ case 1:
+ ins = 1 << 5;
+ break;
+ default:
+ ins = 0;
+ break;
+ }
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 21;
+
+ return push_inst(compiler, VDUP | ins | VN(freg) | RD(src));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg, sljit_s32 lane_index,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ freg = simd_get_quad_reg_index(freg);
+
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
+ if (lane_index == 1)
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+
+ if (srcdst != freg)
+ FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(srcdst) | VM(srcdst)));
+
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ return push_inst(compiler, VMOV_i | VD(freg));
+ }
+
+ if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) {
+ FAIL_IF(push_inst(compiler, VORR | ins | VD(TMP_FREG2) | VN(freg) | VM(freg)));
+ srcdst = TMP_FREG2;
+ srcdstw = 0;
+ }
+ }
+
+ FAIL_IF(push_inst(compiler, VMOV_i | ins | VD(freg)));
+ }
+
+ if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
+ lane_index -= (0x8 >> elem_size);
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ }
+
+ if (srcdst & SLJIT_MEM) {
+ if (elem_size == 3)
+ return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw);
+
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
+
+ lane_index = lane_index << elem_size;
+ ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
+ return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD(freg) | RN(srcdst) | 0xf);
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (elem_size == 3) {
+ if (type & SLJIT_SIMD_STORE)
+ return push_inst(compiler, VORR | VD(srcdst) | VN(freg) | VM(freg));
+ return push_inst(compiler, VMOV_F32 | SLJIT_32 | VD(freg) | VM(srcdst));
+ }
+
+ if (type & SLJIT_SIMD_STORE) {
+ if (freg_ebit_map[freg] == 0) {
+ if (lane_index == 1)
+ freg = SLJIT_F64_SECOND(freg);
+
+ return push_inst(compiler, VMOV_F32 | VD(srcdst) | VM(freg));
+ }
+
+ FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1)));
+ return push_inst(compiler, VMOV | VN(srcdst) | RD(TMP_REG1));
+ }
+
+ FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(srcdst) | RD(TMP_REG1)));
+ return push_inst(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1));
+ }
+
+ if (srcdst == SLJIT_IMM) {
+ if (elem_size < 2)
+ srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
+ srcdst = TMP_REG1;
+ }
+
+ if (elem_size == 0)
+ ins = 0x400000;
+ else if (elem_size == 1)
+ ins = 0x20;
+ else
+ ins = 0;
+
+ lane_index = lane_index << elem_size;
+ ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
+
+ if (type & SLJIT_SIMD_STORE) {
+ ins |= (1 << 20);
+
+ if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
+ ins |= (1 << 23);
+ }
+
+ return push_inst(compiler, VMOV_s | ins | VN(freg) | RD(srcdst));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_s32 src_lane_index)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4) {
+ freg = simd_get_quad_reg_index(freg);
+ src = simd_get_quad_reg_index(src);
+
+ if (src_lane_index >= (0x8 >> elem_size)) {
+ src_lane_index -= (0x8 >> elem_size);
+ src += SLJIT_QUAD_OTHER_HALF(src);
+ }
+ }
+
+ if (elem_size == 3) {
+ if (freg != src)
+ FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)));
+
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+
+ if (freg != src)
+ return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src));
+ return SLJIT_SUCCESS;
+ }
+
+ ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 6;
+
+ return push_inst(compiler, VDUP_s | ins | VD(freg) | VM(src));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+ sljit_s32 dst_reg;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ freg = simd_get_quad_reg_index(freg);
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
+ if (reg_size == 4 && elem2_size - elem_size == 1)
+ FAIL_IF(push_inst(compiler, VLD1 | (0x7 << 8) | VD(freg) | RN(src) | 0xf));
+ else
+ FAIL_IF(push_inst(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD(freg) | RN(src) | 0xf));
+ src = freg;
+ } else if (reg_size == 4)
+ src = simd_get_quad_reg_index(src);
+
+ if (!(type & SLJIT_SIMD_FLOAT)) {
+ dst_reg = (reg_size == 4) ? freg : TMP_FREG2;
+
+ do {
+ FAIL_IF(push_inst(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 24))
+ | ((sljit_ins)1 << (19 + elem_size)) | VD(dst_reg) | VM(src)));
+ src = dst_reg;
+ } while (++elem_size < elem2_size);
+
+ if (dst_reg == TMP_FREG2)
+ return push_inst(compiler, VORR | VD(freg) | VN(TMP_FREG2) | VM(TMP_FREG2));
+ return SLJIT_SUCCESS;
+ }
+
+ /* No SIMD variant, must use VFP instead. */
+ SLJIT_ASSERT(reg_size == 4);
+
+ if (freg == src) {
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20));
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src));
+ }
+
+ FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src)));
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins, imms;
+ sljit_s32 dst_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
+
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ switch (elem_size) {
+ case 0:
+ imms = 0x243219;
+ ins = VSHR | (1 << 24) | (0x9 << 16);
+ break;
+ case 1:
+ imms = (reg_size == 4) ? 0x243219 : 0x2231;
+ ins = VSHR | (1 << 24) | (0x11 << 16);
+ break;
+ case 2:
+ imms = (reg_size == 4) ? 0x2231 : 0x21;
+ ins = VSHR | (1 << 24) | (0x21 << 16);
+ break;
+ default:
+ imms = 0x21;
+ ins = VSHR | (1 << 24) | (0x1 << 16) | (1 << 7);
+ break;
+ }
+
+ if (reg_size == 4) {
+ freg = simd_get_quad_reg_index(freg);
+ ins |= (sljit_ins)1 << 6;
+ }
+
+ SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
+ FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG2) | VM(freg)));
+
+ if (reg_size == 4 && elem_size > 0)
+ FAIL_IF(push_inst(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD(TMP_FREG2) | VM(TMP_FREG2)));
+
+ ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
+
+ while (imms >= 0x100) {
+ FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | ((imms & 0xff) << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
+ imms >>= 8;
+ }
+
+ FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | (1 << 7) | (imms << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(dst_r) | VN(TMP_FREG2)));
+
+ if (reg_size == 4 && elem_size == 0) {
+ SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
+ FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(TMP_REG2) | VN(TMP_FREG1)));
+ FAIL_IF(push_inst(compiler, ORR | RD(dst_r) | RN(dst_r) | RM(TMP_REG2) | (0x8 << 7)));
+ }
+
+ if (dst_r == TMP_REG1)
+ return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins = 0;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ switch (SLJIT_SIMD_GET_OPCODE(type)) {
+ case SLJIT_SIMD_OP2_AND:
+ ins = VAND;
+ break;
+ case SLJIT_SIMD_OP2_OR:
+ ins = VORR;
+ break;
+ case SLJIT_SIMD_OP2_XOR:
+ ins = VEOR;
+ break;
+ }
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4) {
+ dst_freg = simd_get_quad_reg_index(dst_freg);
+ src1_freg = simd_get_quad_reg_index(src1_freg);
+ src2_freg = simd_get_quad_reg_index(src2_freg);
+ ins |= (sljit_ins)1 << 6;
+ }
+
+ return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg));
}
#undef FPU_LOAD
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_reg,
+ sljit_s32 mem_reg)
+{
+ sljit_u32 ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV_U8:
+ ins = LDREXB;
+ break;
+ case SLJIT_MOV_U16:
+ ins = LDREXH;
+ break;
+ default:
+ ins = LDREX;
+ break;
+ }
+
+ return push_inst(compiler, ins | RN(mem_reg) | RD(dst_reg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src_reg,
+ sljit_s32 mem_reg,
+ sljit_s32 temp_reg)
+{
+ sljit_u32 ins;
+
+ /* temp_reg == mem_reg is undefined so use another temp register */
+ SLJIT_UNUSED_ARG(temp_reg);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV_U8:
+ ins = STREXB;
+ break;
+ case SLJIT_MOV_U16:
+ ins = STREXH;
+ break;
+ default:
+ ins = STREX;
+ break;
+ }
+
+ FAIL_IF(push_inst(compiler, ins | RN(mem_reg) | RD(TMP_REG1) | RM(src_reg)));
+ if (op & SLJIT_SET_ATOMIC_STORED)
+ return push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(TMP_REG1));
+
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{
struct sljit_const *const_;
@@ -3645,13 +4446,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
PTR_FAIL_IF(push_inst_with_unique_literal(compiler,
- EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_uw)init_value));
+ EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_ins)init_value));
compiler->patches++;
-#else
+#else /* !SLJIT_CONFIG_ARM_V6 */
PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value));
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
PTR_FAIL_IF(!const_);
@@ -3673,12 +4474,12 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0));
compiler->patches++;
-#else
+#else /* !SLJIT_CONFIG_ARM_V6 */
PTR_FAIL_IF(emit_imm(compiler, dst_r, 0));
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 */
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
PTR_FAIL_IF(!put_label);
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c
index 89f747e7c8..b268582f42 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c
@@ -67,79 +67,123 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
/* Instrucion forms */
/* --------------------------------------------------------------------- */
-#define ADC 0x9a000000
-#define ADD 0x8b000000
-#define ADDE 0x8b200000
-#define ADDI 0x91000000
-#define AND 0x8a000000
-#define ANDI 0x92000000
-#define ASRV 0x9ac02800
-#define B 0x14000000
-#define B_CC 0x54000000
-#define BL 0x94000000
-#define BLR 0xd63f0000
-#define BR 0xd61f0000
-#define BRK 0xd4200000
-#define CBZ 0xb4000000
-#define CLZ 0xdac01000
-#define CSEL 0x9a800000
-#define CSINC 0x9a800400
-#define EOR 0xca000000
-#define EORI 0xd2000000
-#define EXTR 0x93c00000
-#define FABS 0x1e60c000
-#define FADD 0x1e602800
-#define FCMP 0x1e602000
-#define FCVT 0x1e224000
-#define FCVTZS 0x9e780000
-#define FDIV 0x1e601800
-#define FMOV 0x1e604000
-#define FMUL 0x1e600800
-#define FNEG 0x1e614000
-#define FSUB 0x1e603800
-#define LDRI 0xf9400000
-#define LDRI_F64 0xfd400000
-#define LDRI_POST 0xf8400400
-#define LDP 0xa9400000
-#define LDP_F64 0x6d400000
-#define LDP_POST 0xa8c00000
-#define LDR_PRE 0xf8400c00
-#define LSLV 0x9ac02000
-#define LSRV 0x9ac02400
-#define MADD 0x9b000000
-#define MOVK 0xf2800000
-#define MOVN 0x92800000
-#define MOVZ 0xd2800000
-#define NOP 0xd503201f
-#define ORN 0xaa200000
-#define ORR 0xaa000000
-#define ORRI 0xb2000000
-#define RBIT 0xdac00000
-#define RET 0xd65f0000
-#define RORV 0x9ac02c00
-#define SBC 0xda000000
-#define SBFM 0x93000000
-#define SCVTF 0x9e620000
-#define SDIV 0x9ac00c00
-#define SMADDL 0x9b200000
-#define SMULH 0x9b403c00
-#define STP 0xa9000000
-#define STP_F64 0x6d000000
-#define STP_PRE 0xa9800000
-#define STRB 0x38206800
-#define STRBI 0x39000000
-#define STRI 0xf9000000
-#define STRI_F64 0xfd000000
-#define STR_FI 0x3d000000
-#define STR_FR 0x3c206800
-#define STUR_FI 0x3c000000
-#define STURBI 0x38000000
-#define SUB 0xcb000000
-#define SUBI 0xd1000000
-#define SUBS 0xeb000000
-#define UBFM 0xd3000000
-#define UDIV 0x9ac00800
-#define UMULH 0x9bc03c00
+#define ADC 0x9a000000
+#define ADD 0x8b000000
+#define ADDE 0x8b200000
+#define ADDI 0x91000000
+#define AND 0x8a000000
+#define ANDI 0x92000000
+#define AND_v 0x0e201c00
+#define ASRV 0x9ac02800
+#define B 0x14000000
+#define B_CC 0x54000000
+#define BL 0x94000000
+#define BLR 0xd63f0000
+#define BR 0xd61f0000
+#define BRK 0xd4200000
+#define CAS 0xc8a07c00
+#define CASB 0x08a07c00
+#define CASH 0x48a07c00
+#define CBZ 0xb4000000
+#define CCMPI 0xfa400800
+#define CLZ 0xdac01000
+#define CSEL 0x9a800000
+#define CSINC 0x9a800400
+#define DUP_e 0x0e000400
+#define DUP_g 0x0e000c00
+#define EOR 0xca000000
+#define EOR_v 0x2e201c00
+#define EORI 0xd2000000
+#define EXTR 0x93c00000
+#define FABS 0x1e60c000
+#define FADD 0x1e602800
+#define FCMP 0x1e602000
+#define FCSEL 0x1e600c00
+#define FCVT 0x1e224000
+#define FCVTL 0x0e217800
+#define FCVTZS 0x9e780000
+#define FDIV 0x1e601800
+#define FMOV 0x1e604000
+#define FMOV_R 0x9e660000
+#define FMOV_I 0x1e601000
+#define FMUL 0x1e600800
+#define FNEG 0x1e614000
+#define FSUB 0x1e603800
+#define INS 0x4e001c00
+#define INS_e 0x6e000400
+#define LD1 0x0c407000
+#define LD1_s 0x0d400000
+#define LD1R 0x0d40c000
+#define LDRI 0xf9400000
+#define LDRI_F64 0xfd400000
+#define LDRI_POST 0xf8400400
+#define LDP 0xa9400000
+#define LDP_F64 0x6d400000
+#define LDP_POST 0xa8c00000
+#define LDR_PRE 0xf8400c00
+#define LDXR 0xc85f7c00
+#define LDXRB 0x085f7c00
+#define LDXRH 0x485f7c00
+#define LSLV 0x9ac02000
+#define LSRV 0x9ac02400
+#define MADD 0x9b000000
+#define MOVI 0x0f000400
+#define MOVK 0xf2800000
+#define MOVN 0x92800000
+#define MOVZ 0xd2800000
+#define NOP 0xd503201f
+#define ORN 0xaa200000
+#define ORR 0xaa000000
+#define ORR_v 0x0ea01c00
+#define ORRI 0xb2000000
+#define RBIT 0xdac00000
+#define RET 0xd65f0000
+#define REV 0xdac00c00
+#define REV16 0xdac00400
+#define RORV 0x9ac02c00
+#define SBC 0xda000000
+#define SBFM 0x93400000
+#define SCVTF 0x9e620000
+#define SDIV 0x9ac00c00
+#define SMADDL 0x9b200000
+#define SMOV 0x0e002c00
+#define SMULH 0x9b403c00
+#define SSHLL 0x0f00a400
+#define ST1 0x0c007000
+#define ST1_s 0x0d000000
+#define STP 0xa9000000
+#define STP_F64 0x6d000000
+#define STP_PRE 0xa9800000
+#define STRB 0x38206800
+#define STRBI 0x39000000
+#define STRI 0xf9000000
+#define STRI_F64 0xfd000000
+#define STR_FI 0x3d000000
+#define STR_FR 0x3c206800
+#define STUR_FI 0x3c000000
+#define STURBI 0x38000000
+#define STXR 0xc8007c00
+#define STXRB 0x8007c00
+#define STXRH 0x48007c00
+#define SUB 0xcb000000
+#define SUBI 0xd1000000
+#define SUBS 0xeb000000
+#define TBZ 0x36000000
+#define UBFM 0xd3400000
+#define UCVTF 0x9e630000
+#define UDIV 0x9ac00800
+#define UMOV 0x0e003c00
+#define UMULH 0x9bc03c00
+#define USHLL 0x2f00a400
+#define USHR 0x2f000400
+#define USRA 0x2f001400
+#define XTN 0x0e212800
+
+#define CSET (CSINC | RM(TMP_ZERO) | RN(TMP_ZERO))
+#define LDR (STRI | (1 << 22))
+#define LDRB (STRBI | (1 << 22))
+#define LDRH (LDRB | (1 << 30))
+#define MOV (ORR | RN(TMP_ZERO))
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
{
@@ -175,7 +219,7 @@ static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins
target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
}
- diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset;
+ diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr - 4) - executable_offset;
if (jump->flags & IS_COND) {
diff += SSIZE_OF(ins);
@@ -385,8 +429,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
{
switch (feature_type) {
case SLJIT_HAS_FPU:
+ case SLJIT_HAS_SIMD:
#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
#else
/* Available by default. */
return 1;
@@ -394,9 +439,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CTZ:
+ case SLJIT_HAS_REV:
case SLJIT_HAS_ROT:
case SLJIT_HAS_CMOV:
case SLJIT_HAS_PREFETCH:
+ case SLJIT_HAS_COPY_F32:
+ case SLJIT_HAS_COPY_F64:
+ case SLJIT_HAS_ATOMIC:
return 1;
default:
@@ -404,6 +453,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
}
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
+{
+ switch (type) {
+ case SLJIT_UNORDERED_OR_EQUAL:
+ case SLJIT_ORDERED_NOT_EQUAL:
+ return 2;
+ }
+
+ return 0;
+}
+
/* --------------------------------------------------------------------- */
/* Core code generator functions. */
/* --------------------------------------------------------------------- */
@@ -636,6 +696,11 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
case SLJIT_MUL:
case SLJIT_CLZ:
case SLJIT_CTZ:
+ case SLJIT_REV:
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
case SLJIT_ADDC:
case SLJIT_SUBC:
/* No form with immediate operand (except imm 0, which
@@ -644,10 +709,6 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
case SLJIT_MOV:
SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
return load_immediate(compiler, dst, imm);
- case SLJIT_NOT:
- SLJIT_ASSERT(flags & ARG2_IMM);
- FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm));
- goto set_flags;
case SLJIT_SUB:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
if (flags & ARG1_IMM)
@@ -694,8 +755,13 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
break;
CHECK_FLAGS(3 << 29);
return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits);
- case SLJIT_OR:
case SLJIT_XOR:
+ if (imm == -1) {
+ FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(reg)));
+ goto set_flags;
+ }
+ /* fallthrough */
+ case SLJIT_OR:
inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
if (!inst_bits)
break;
@@ -718,6 +784,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
inst_bits = ((sljit_ins)1 << 22) | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10);
}
+ inv_bits |= inv_bits >> 9;
FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits));
goto set_flags;
case SLJIT_LSHR:
@@ -727,6 +794,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
if (flags & ARG1_IMM)
break;
+ inv_bits |= inv_bits >> 9;
if (op >= SLJIT_ASHR)
inv_bits |= 1 << 30;
@@ -780,22 +848,22 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
if (dst == arg2)
return SLJIT_SUCCESS;
- return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2));
+ return push_inst(compiler, MOV | RD(dst) | RM(arg2));
case SLJIT_MOV_U8:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
- return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (7 << 10));
+ inv_bits |= inv_bits >> 9;
+ return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
case SLJIT_MOV_S8:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
- if (!(flags & INT_OP))
- inv_bits |= 1 << 22;
+ inv_bits |= inv_bits >> 9;
return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
case SLJIT_MOV_U16:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
- return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (15 << 10));
+ inv_bits |= inv_bits >> 9;
+ return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
case SLJIT_MOV_S16:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
- if (!(flags & INT_OP))
- inv_bits |= 1 << 22;
+ inv_bits |= inv_bits >> 9;
return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
case SLJIT_MOV32:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
@@ -804,14 +872,10 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
/* fallthrough */
case SLJIT_MOV_U32:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
- return push_inst(compiler, (ORR ^ W_OP) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
+ return push_inst(compiler, (MOV ^ W_OP) | RD(dst) | RM(arg2));
case SLJIT_MOV_S32:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10));
- case SLJIT_NOT:
- SLJIT_ASSERT(arg1 == TMP_REG1);
- FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)));
- break; /* Set flags. */
case SLJIT_CLZ:
SLJIT_ASSERT(arg1 == TMP_REG1);
return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2));
@@ -819,6 +883,25 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
SLJIT_ASSERT(arg1 == TMP_REG1);
FAIL_IF(push_inst(compiler, (RBIT ^ inv_bits) | RD(dst) | RN(arg2)));
return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(dst));
+ case SLJIT_REV:
+ SLJIT_ASSERT(arg1 == TMP_REG1);
+ inv_bits |= inv_bits >> 21;
+ return push_inst(compiler, (REV ^ inv_bits) | RD(dst) | RN(arg2));
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2);
+ FAIL_IF(push_inst(compiler, (REV16 ^ (sljit_ins)0x80000000) | RD(dst) | RN(arg2)));
+ if (dst == TMP_REG1 || (arg2 == TMP_REG2 && op == SLJIT_REV_U16))
+ return SLJIT_SUCCESS;
+ inv_bits |= inv_bits >> 9;
+ return push_inst(compiler, ((op == SLJIT_REV_U16 ? UBFM : SBFM) ^ inv_bits) | RD(dst) | RN(dst) | (15 << 10));
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
+ SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2);
+ FAIL_IF(push_inst(compiler, (REV ^ (sljit_ins)0x80000400) | RD(dst) | RN(arg2)));
+ if (op == SLJIT_REV_U32 || dst == TMP_REG1)
+ return SLJIT_SUCCESS;
+ return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(dst) | (31 << 10));
case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
CHECK_FLAGS(1 << 29);
@@ -980,7 +1063,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 2);
- saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64));
+ saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
compiler->local_size = local_size;
@@ -1065,7 +1148,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
while (arg_types) {
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
- FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - saved_arg_count) | RN(TMP_ZERO) | RM(tmp)));
+ FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(tmp)));
saved_arg_count++;
}
tmp++;
@@ -1153,7 +1236,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 2);
- saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64));
+ saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
return SLJIT_SUCCESS;
@@ -1272,7 +1355,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c
src = TMP_REG1;
srcw = 0;
} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
- FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src)));
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
src = TMP_REG1;
srcw = 0;
}
@@ -1302,12 +1385,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
return push_inst(compiler, NOP);
case SLJIT_LMUL_UW:
case SLJIT_LMUL_SW:
- FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(SLJIT_R0)));
FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
case SLJIT_DIVMOD_UW:
case SLJIT_DIVMOD_SW:
- FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
+ FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RD(TMP_REG1) | RM(SLJIT_R0)));
FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)));
FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
@@ -1349,33 +1432,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
break;
case SLJIT_MOV_U8:
mem_flags = BYTE_SIZE;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u8)srcw;
break;
case SLJIT_MOV_S8:
mem_flags = BYTE_SIZE | SIGNED;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s8)srcw;
break;
case SLJIT_MOV_U16:
mem_flags = HALF_SIZE;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u16)srcw;
break;
case SLJIT_MOV_S16:
mem_flags = HALF_SIZE | SIGNED;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s16)srcw;
break;
case SLJIT_MOV_U32:
mem_flags = INT_SIZE;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u32)srcw;
break;
case SLJIT_MOV_S32:
case SLJIT_MOV32:
mem_flags = INT_SIZE | SIGNED;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s32)srcw;
break;
default:
@@ -1384,7 +1467,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
break;
}
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
else if (!(src & SLJIT_MEM))
dst_r = src;
@@ -1397,11 +1480,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
}
flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
- mem_flags = WORD_SIZE;
- if (op_flags & SLJIT_32) {
- flags |= INT_OP;
+ switch (op) {
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ mem_flags = HALF_SIZE;
+ break;
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
mem_flags = INT_SIZE;
+ break;
+ default:
+ mem_flags = WORD_SIZE;
+
+ if (op_flags & SLJIT_32) {
+ flags |= INT_OP;
+ mem_flags = INT_SIZE;
+ }
+ break;
}
if (src & SLJIT_MEM) {
@@ -1451,12 +1547,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
src2 = TMP_REG2;
}
- if (src1 & SLJIT_IMM)
+ if (src1 == SLJIT_IMM)
flags |= ARG1_IMM;
else
src1w = src1;
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
flags |= ARG2_IMM;
else
src2w = src2;
@@ -1480,57 +1576,52 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 src_dst,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
+ sljit_s32 dst_reg,
+ sljit_s32 src1_reg,
+ sljit_s32 src2_reg,
+ sljit_s32 src3, sljit_sw src3w)
{
sljit_ins inv_bits, imm;
sljit_s32 is_left;
sljit_sw mask;
CHECK_ERROR();
- CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
+ CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
- if (src_dst == src1) {
+ if (src1_reg == src2_reg) {
SLJIT_SKIP_CHECKS(compiler);
- return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
+ return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
}
- ADJUST_LOCAL_OFFSET(src1, src1w);
- ADJUST_LOCAL_OFFSET(src2, src2w);
+ ADJUST_LOCAL_OFFSET(src3, src3w);
inv_bits = (op & SLJIT_32) ? W_OP : 0;
- mask = inv_bits ? 0x1f : 0x3f;
- if (src2 & SLJIT_IMM) {
- src2w &= mask;
+ if (src3 == SLJIT_IMM) {
+ mask = inv_bits ? 0x1f : 0x3f;
+ src3w &= mask;
- if (src2w == 0)
+ if (src3w == 0)
return SLJIT_SUCCESS;
- } else if (src2 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG2, src2, src2w, TMP_REG2));
- src2 = TMP_REG2;
- }
- if (src1 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1));
- src1 = TMP_REG1;
- } else if (src1 & SLJIT_IMM) {
- FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
- src1 = TMP_REG1;
- }
-
- if (src2 & SLJIT_IMM) {
if (is_left)
- src2w = (src2w ^ mask) + 1;
+ src3w = (src3w ^ mask) + 1;
+
+ return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst_reg)
+ | RN(is_left ? src1_reg : src2_reg) | RM(is_left ? src2_reg : src1_reg) | ((sljit_ins)src3w << 10));
+ }
- return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(src_dst)
- | RN(is_left ? src_dst : src1) | RM(is_left ? src1 : src_dst) | ((sljit_ins)src2w << 10));
+ if (src3 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2));
+ src3 = TMP_REG2;
+ } else if (dst_reg == src3) {
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src3)));
+ src3 = TMP_REG2;
}
- FAIL_IF(push_inst(compiler, ((is_left ? LSLV : LSRV) ^ inv_bits) | RD(src_dst) | RN(src_dst) | RM(src2)));
+ FAIL_IF(push_inst(compiler, ((is_left ? LSLV : LSRV) ^ inv_bits) | RD(dst_reg) | RN(src1_reg) | RM(src3)));
if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
/* Shift left/right by 1. */
@@ -1539,18 +1630,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *
else
imm = (sljit_ins)(inv_bits ? ((31 << 16) | (30 << 10)) : ((63 << 16) | (62 << 10) | (1 << 22)));
- FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(TMP_REG1) | RN(src1) | imm));
+ FAIL_IF(push_inst(compiler, (UBFM ^ (inv_bits | (inv_bits >> 9))) | RD(TMP_REG1) | RN(src2_reg) | imm));
/* Set imm to mask. */
imm = (sljit_ins)(inv_bits ? (4 << 10) : ((5 << 10) | (1 << 22)));
- FAIL_IF(push_inst(compiler, (EORI ^ inv_bits) | RD(TMP_REG2) | RN(src2) | imm));
+ FAIL_IF(push_inst(compiler, (EORI ^ inv_bits) | RD(TMP_REG2) | RN(src3) | imm));
- src1 = TMP_REG1;
+ src2_reg = TMP_REG1;
} else
- FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(src2)));
+ FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(src3)));
- FAIL_IF(push_inst(compiler, ((is_left ? LSRV : LSLV) ^ inv_bits) | RD(TMP_REG1) | RN(src1) | RM(TMP_REG2)));
- return push_inst(compiler, (ORR ^ inv_bits) | RD(src_dst) | RN(src_dst) | RM(TMP_REG1));
+ FAIL_IF(push_inst(compiler, ((is_left ? LSRV : LSLV) ^ inv_bits) | RD(TMP_REG1) | RN(src2_reg) | RM(TMP_REG2)));
+ return push_inst(compiler, (ORR ^ inv_bits) | RD(dst_reg) | RN(dst_reg) | RM(TMP_REG1));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1563,7 +1654,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
switch (op) {
case SLJIT_FAST_RETURN:
if (FAST_IS_REG(src))
- FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src)));
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_LR) | RM(src)));
else
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw, TMP_REG1));
@@ -1593,15 +1684,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw)
{
- CHECK_REG_INDEX(check_sljit_get_register_index(reg));
- return reg_map[reg];
+ sljit_s32 dst_r = TMP_LR;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ switch (op) {
+ case SLJIT_FAST_ENTER:
+ if (FAST_IS_REG(dst))
+ return push_inst(compiler, MOV | RD(dst) | RM(TMP_LR));
+ break;
+ case SLJIT_GET_RETURN_ADDRESS:
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), 0x8, TMP_REG2));
+ break;
+ }
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2);
+
+ return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
{
- CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+ CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
+
+ if (type == SLJIT_GP_REGISTER)
+ return reg_map[reg];
+
+ if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_64 && type != SLJIT_SIMD_REG_128)
+ return -1;
+
return freg_map[reg];
}
@@ -1679,7 +1797,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
inv_bits |= W_OP;
if (src & SLJIT_MEM) {
- emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw);
+ FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw));
src = TMP_FREG1;
}
@@ -1690,34 +1808,59 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
return SLJIT_SUCCESS;
}
-static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
-
- if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
- inv_bits |= W_OP;
if (src & SLJIT_MEM) {
- emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1);
+ emit_op_mem(compiler, (ins & W_OP) ? WORD_SIZE : INT_SIZE, TMP_REG1, src, srcw, TMP_REG1);
src = TMP_REG1;
- } else if (src & SLJIT_IMM) {
- if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
- srcw = (sljit_s32)srcw;
-
+ } else if (src == SLJIT_IMM) {
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
}
- FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src)));
+ FAIL_IF(push_inst(compiler, ins | VD(dst_r) | RN(src)));
if (dst & SLJIT_MEM)
- return emit_fop_mem(compiler, ((op & SLJIT_32) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw);
+ return emit_fop_mem(compiler, ((ins & (1 << 22)) ? WORD_SIZE : INT_SIZE) | STORE, TMP_FREG1, dst, dstw);
return SLJIT_SUCCESS;
}
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
+
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
+ inv_bits |= W_OP;
+
+ if (src == SLJIT_IMM)
+ srcw = (sljit_s32)srcw;
+ }
+
+ return sljit_emit_fop1_conv_f64_from_w(compiler, SCVTF ^ inv_bits, dst, dstw, src, srcw);
+}
+
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
+
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {
+ inv_bits |= W_OP;
+
+ if (src == SLJIT_IMM)
+ srcw = (sljit_u32)srcw;
+ }
+
+ return sljit_emit_fop1_conv_f64_from_w(compiler, UCVTF ^ inv_bits, dst, dstw, src, srcw);
+}
+
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
@@ -1726,16 +1869,22 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
if (src1 & SLJIT_MEM) {
- emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
+ FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w));
src1 = TMP_FREG1;
}
if (src2 & SLJIT_MEM) {
- emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
+ FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w));
src2 = TMP_FREG2;
}
- return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2));
+ FAIL_IF(push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)));
+
+ if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
+ return SLJIT_SUCCESS;
+
+ FAIL_IF(push_inst(compiler, CSINC | (0x0 << 12) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(TMP_ZERO)));
+ return push_inst(compiler, CCMPI | (0x0 << 16) | (0x7 << 12) | RN(TMP_REG1) | 0x4);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1754,7 +1903,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src & SLJIT_MEM) {
- emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw);
+ FAIL_IF(emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw));
src = dst_r;
}
@@ -1799,11 +1948,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src1 & SLJIT_MEM) {
- emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
+ FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w));
src1 = TMP_FREG1;
}
if (src2 & SLJIT_MEM) {
- emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
+ FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w));
src2 = TMP_FREG2;
}
@@ -1820,6 +1969,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
case SLJIT_DIV_F64:
FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
break;
+ case SLJIT_COPYSIGN_F64:
+ FAIL_IF(push_inst(compiler, (FMOV_R ^ ((op & SLJIT_32) ? (W_OP | (1 << 22)) : 0)) | VN(src2) | RD(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src1)));
+ FAIL_IF(push_inst(compiler, TBZ | ((op & SLJIT_32) ? 0 : ((sljit_ins)1 << 31)) | (0x1f << 19) | (2 << 5) | RT(TMP_REG1)));
+ return push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(dst_r));
}
if (!(dst & SLJIT_MEM))
@@ -1827,21 +1981,79 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw);
}
-/* --------------------------------------------------------------------- */
-/* Other instructions */
-/* --------------------------------------------------------------------- */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f32 value)
+{
+ sljit_u32 exp;
+ union {
+ sljit_u32 imm;
+ sljit_f32 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset32(compiler, freg, value));
+
+ u.value = value;
+
+ if (u.imm == 0)
+ return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_ZERO) | VD(freg) | (1 << 16));
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+ if ((u.imm << (32 - 19)) == 0) {
+ exp = (u.imm >> (23 + 2)) & 0x3f;
+
+ if (exp == 0x20 || exp == 0x1f)
+ return push_inst(compiler, (FMOV_I ^ (1 << 22)) | (sljit_ins)((((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f)) << 13) | VD(freg));
+ }
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_s32)u.imm));
+ return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_REG1) | VD(freg) | (1 << 16));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
{
+ sljit_uw exp;
+ union {
+ sljit_uw imm;
+ sljit_f64 value;
+ } u;
+
CHECK_ERROR();
- CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
- ADJUST_LOCAL_OFFSET(dst, dstw);
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
- if (FAST_IS_REG(dst))
- return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR));
+ u.value = value;
- /* Memory. */
- return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw, TMP_REG1);
+ if (u.imm == 0)
+ return push_inst(compiler, FMOV_R | RN(TMP_ZERO) | VD(freg) | (sljit_ins)1 << 16);
+
+ if ((u.imm << (64 - 48)) == 0) {
+ exp = (u.imm >> (52 + 2)) & 0x1ff;
+
+ if (exp == 0x100 || exp == 0xff)
+ return push_inst(compiler, FMOV_I | (sljit_ins)((((u.imm >> 56) & 0x80) | ((u.imm >> 48) & 0x7f)) << 13) | VD(freg));
+ }
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_sw)u.imm));
+ return push_inst(compiler, FMOV_R | RN(TMP_REG1) | VD(freg) | (1 << 16));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ sljit_ins inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
+ inst = FMOV_R | RN(reg) | VD(freg) | (1 << 16);
+ else
+ inst = FMOV_R | VN(freg) | RD(reg);
+
+ if (op & SLJIT_32)
+ inst ^= W_OP | (1 << 22);
+
+ return push_inst(compiler, inst);
}
/* --------------------------------------------------------------------- */
@@ -1852,15 +2064,17 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
{
switch (type) {
case SLJIT_EQUAL:
+ case SLJIT_ATOMIC_STORED:
case SLJIT_F_EQUAL:
case SLJIT_ORDERED_EQUAL:
- case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */
+ case SLJIT_UNORDERED_OR_EQUAL:
return 0x1;
case SLJIT_NOT_EQUAL:
+ case SLJIT_ATOMIC_NOT_STORED:
case SLJIT_F_NOT_EQUAL:
case SLJIT_UNORDERED_OR_NOT_EQUAL:
- case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */
+ case SLJIT_ORDERED_NOT_EQUAL:
return 0x0;
case SLJIT_CARRY:
@@ -2011,7 +2225,7 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi
PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
}
- else if (src & SLJIT_IMM) {
+ else if (src == SLJIT_IMM) {
PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
}
@@ -2035,7 +2249,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
CHECK_ERROR();
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
@@ -2071,7 +2285,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
if (type & SLJIT_CALL_RETURN) {
if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
- FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src)));
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
src = TMP_REG1;
}
@@ -2131,27 +2345,53 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_reg)
{
sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0;
sljit_ins cc;
CHECK_ERROR();
- CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+ CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
- if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ if (src1 == SLJIT_IMM) {
if (type & SLJIT_32)
- srcw = (sljit_s32)srcw;
- FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
- src = TMP_REG1;
- srcw = 0;
+ src1w = (sljit_s32)src1w;
+ FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
+ src1 = TMP_REG1;
+ } else if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG2));
+ src1 = TMP_REG1;
}
cc = get_cc(compiler, type & ~SLJIT_32);
+ return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(src2_reg) | RM(src1));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_freg)
+{
+ sljit_ins inv_bits = (type & SLJIT_32) ? (1 << 22) : 0;
+ sljit_ins cc;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src1, src1w));
+ src1 = TMP_FREG1;
+ }
- return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src));
+ cc = get_cc(compiler, type & ~SLJIT_32);
+ return push_inst(compiler, (FCSEL ^ inv_bits) | (cc << 12) | VD(dst_freg) | VN(src2_freg) | VM(src1));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
@@ -2308,6 +2548,661 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler
return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));
}
+static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
+{
+ sljit_ins ins;
+ sljit_s32 mem = *mem_ptr;
+
+ if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
+ *mem_ptr = TMP_REG1;
+ return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10));
+ }
+
+ if (!(mem & REG_MASK)) {
+ *mem_ptr = TMP_REG1;
+ return load_immediate(compiler, TMP_REG1, memw);
+ }
+
+ mem &= REG_MASK;
+
+ if (memw == 0) {
+ *mem_ptr = mem;
+ return SLJIT_SUCCESS;
+ }
+
+ *mem_ptr = TMP_REG1;
+
+ if (memw < -0xffffff || memw > 0xffffff) {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
+ return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem));
+ }
+
+ ins = ADDI;
+
+ if (memw < 0) {
+ memw = -memw;
+ ins = SUBI;
+ }
+
+ if (memw > 0xfff) {
+ FAIL_IF(push_inst(compiler, ins | (1 << 22) | RD(TMP_REG1) | RN(mem) | ((sljit_ins)(memw >> 12) << 10)));
+
+ memw &= 0xfff;
+ if (memw == 0)
+ return SLJIT_SUCCESS;
+
+ mem = TMP_REG1;
+ }
+
+ return push_inst(compiler, ins | RD(TMP_REG1) | RN(mem) | ((sljit_ins)memw << 10));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (!(srcdst & SLJIT_MEM)) {
+ if (type & SLJIT_SIMD_STORE)
+ ins = VD(srcdst) | VN(freg) | VM(freg);
+ else
+ ins = VD(freg) | VN(srcdst) | VM(srcdst);
+
+ if (reg_size == 4)
+ ins |= (1 << 30);
+
+ return push_inst(compiler, ORR_v | ins);
+ }
+
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
+
+ if (elem_size > 3)
+ elem_size = 3;
+
+ ins = (type & SLJIT_SIMD_STORE) ? ST1 : LD1;
+
+ if (reg_size == 4)
+ ins |= (1 << 30);
+
+ return push_inst(compiler, ins | ((sljit_ins)elem_size << 10) | RN(srcdst) | VT(freg));
+}
+
+static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
+{
+ sljit_ins result;
+
+ if (elem_size > 2 && (sljit_u32)value == (value >> 32)) {
+ elem_size = 2;
+ value = (sljit_u32)value;
+ }
+
+ if (elem_size == 2 && (sljit_u16)value == (value >> 16)) {
+ elem_size = 1;
+ value = (sljit_u16)value;
+ }
+
+ if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
+ elem_size = 0;
+ value = (sljit_u8)value;
+ }
+
+ switch (elem_size) {
+ case 0:
+ SLJIT_ASSERT(value <= 0xff);
+ result = 0xe000;
+ break;
+ case 1:
+ SLJIT_ASSERT(value <= 0xffff);
+ result = 0;
+
+ while (1) {
+ if (value <= 0xff) {
+ result |= 0x8000;
+ break;
+ }
+
+ if ((value & 0xff) == 0) {
+ value >>= 8;
+ result |= 0xa000;
+ break;
+ }
+
+ if (result != 0)
+ return ~(sljit_ins)0;
+
+ value ^= (sljit_uw)0xffff;
+ result = (1 << 29);
+ }
+ break;
+ case 2:
+ SLJIT_ASSERT(value <= 0xffffffff);
+ result = 0;
+
+ while (1) {
+ if (value <= 0xff) {
+ result |= 0x0000;
+ break;
+ }
+
+ if ((value & ~(sljit_uw)0xff00) == 0) {
+ value >>= 8;
+ result |= 0x2000;
+ break;
+ }
+
+ if ((value & ~(sljit_uw)0xff0000) == 0) {
+ value >>= 16;
+ result |= 0x4000;
+ break;
+ }
+
+ if ((value & ~(sljit_uw)0xff000000) == 0) {
+ value >>= 24;
+ result |= 0x6000;
+ break;
+ }
+
+ if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
+ value >>= 8;
+ result |= 0xc000;
+ break;
+ }
+
+ if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
+ value >>= 16;
+ result |= 0xd000;
+ break;
+ }
+
+ if (result != 0)
+ return ~(sljit_ins)0;
+
+ value ^= (sljit_uw)0xffffffff;
+ result = (1 << 29);
+ }
+ break;
+ default:
+ return ~(sljit_ins)0;
+ }
+
+ return (((sljit_ins)value & 0x1f) << 5) | (((sljit_ins)value & 0xe0) << 11) | result;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins, imm;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
+
+ ins = (sljit_ins)elem_size << 10;
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 30;
+
+ return push_inst(compiler, LD1R | ins | RN(src) | VT(freg));
+ }
+
+ ins = (sljit_ins)1 << (16 + elem_size);
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 30;
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (src == SLJIT_IMM)
+ return push_inst(compiler, MOVI | (ins & ((sljit_ins)1 << 30)) | VD(freg));
+
+ return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src));
+ }
+
+ if (src == SLJIT_IMM) {
+ if (elem_size < 3)
+ srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
+
+ imm = simd_get_imm(elem_size, (sljit_uw)srcw);
+
+ if (imm != ~(sljit_ins)0) {
+ imm |= ins & ((sljit_ins)1 << 30);
+
+ return push_inst(compiler, MOVI | imm | VD(freg));
+ }
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ src = TMP_REG1;
+ }
+
+ return push_inst(compiler, DUP_g | ins | VD(freg) | RN(src));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg, sljit_s32 lane_index,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 30);
+
+ if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
+ FAIL_IF(push_inst(compiler, ORR_v | ins | VD(TMP_FREG1) | VN(freg) | VM(freg)));
+ srcdst = TMP_FREG1;
+ srcdstw = 0;
+ }
+
+ FAIL_IF(push_inst(compiler, MOVI | ins | VD(freg)));
+ }
+
+ if (srcdst & SLJIT_MEM) {
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
+
+ if (elem_size == 3)
+ ins = 0x8400;
+ else if (elem_size == 0)
+ ins = 0;
+ else
+ ins = (sljit_ins)0x2000 << elem_size;
+
+ lane_index = lane_index << elem_size;
+ ins |= (sljit_ins)(((lane_index & 0x8) << 27) | ((lane_index & 0x7) << 10));
+
+ return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? ST1_s : LD1_s) | ins | RN(srcdst) | VT(freg));
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (type & SLJIT_SIMD_STORE)
+ ins = INS_e | ((sljit_ins)1 << (16 + elem_size)) | ((sljit_ins)lane_index << (11 + elem_size)) | VD(srcdst) | VN(freg);
+ else
+ ins = INS_e | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)) | VD(freg) | VN(srcdst);
+
+ return push_inst(compiler, ins);
+ }
+
+ if (srcdst == SLJIT_IMM) {
+ if (elem_size < 3)
+ srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw));
+ srcdst = TMP_REG1;
+ }
+
+ if (type & SLJIT_SIMD_STORE) {
+ ins = RD(srcdst) | VN(freg);
+
+ if ((type & SLJIT_SIMD_LANE_SIGNED) && (elem_size < 2 || (elem_size == 2 && !(type & SLJIT_32)))) {
+ ins |= SMOV;
+
+ if (!(type & SLJIT_32))
+ ins |= (sljit_ins)1 << 30;
+ } else
+ ins |= UMOV;
+ } else
+ ins = INS | VD(freg) | RN(srcdst);
+
+ if (elem_size == 3)
+ ins |= (sljit_ins)1 << 30;
+
+ return push_inst(compiler, ins | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_s32 src_lane_index)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ ins = (((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size);
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 30;
+
+ return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
+
+ if (reg_size == 4 && elem2_size - elem_size == 1)
+ FAIL_IF(push_inst(compiler, LD1 | ((sljit_ins)elem_size << 10) | RN(src) | VT(freg)));
+ else
+ FAIL_IF(push_inst(compiler, LD1_s | ((sljit_ins)0x2000 << (reg_size - elem2_size + elem_size)) | RN(src) | VT(freg)));
+ src = freg;
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ SLJIT_ASSERT(reg_size == 4);
+ return push_inst(compiler, FCVTL | (1 << 22) | VD(freg) | VN(src));
+ }
+
+ do {
+ FAIL_IF(push_inst(compiler, ((type & SLJIT_SIMD_EXTEND_SIGNED) ? SSHLL : USHLL)
+ | ((sljit_ins)1 << (19 + elem_size)) | VD(freg) | VN(src)));
+ src = freg;
+ } while (++elem_size < elem2_size);
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins, imms;
+ sljit_s32 dst_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
+
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ switch (elem_size) {
+ case 0:
+ imms = 0x643219;
+ ins = USHR | (0x9 << 16);
+ break;
+ case 1:
+ imms = (reg_size == 4) ? 0x643219 : 0x6231;
+ ins = USHR | (0x11 << 16);
+ break;
+ case 2:
+ imms = (reg_size == 4) ? 0x6231 : 0x61;
+ ins = USHR | (0x21 << 16);
+ break;
+ default:
+ imms = 0x61;
+ ins = USHR | (0x41 << 16);
+ break;
+ }
+
+ if (reg_size == 4)
+ ins |= (1 << 30);
+
+ FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG1) | VN(freg)));
+
+ if (reg_size == 4 && elem_size > 0)
+ FAIL_IF(push_inst(compiler, XTN | ((sljit_ins)(elem_size - 1) << 22) | VD(TMP_FREG1) | VN(TMP_FREG1)));
+
+ if (imms >= 0x100) {
+ ins = (reg_size == 4 && elem_size == 0) ? (1 << 30) : 0;
+
+ do {
+ FAIL_IF(push_inst(compiler, USRA | ins | ((imms & 0xff) << 16) | VD(TMP_FREG1) | VN(TMP_FREG1)));
+ imms >>= 8;
+ } while (imms >= 0x100);
+ }
+
+ FAIL_IF(push_inst(compiler, USRA | (1 << 30) | (imms << 16) | VD(TMP_FREG1) | VN(TMP_FREG1)));
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ ins = (0x1 << 16);
+
+ if (reg_size == 4 && elem_size == 0) {
+ FAIL_IF(push_inst(compiler, INS_e | (0x3 << 16) | (0x8 << 11) | VD(TMP_FREG1) | VN(TMP_FREG1)));
+ ins = (0x2 << 16);
+ }
+
+ FAIL_IF(push_inst(compiler, UMOV | ins | RD(dst_r) | VN(TMP_FREG1)));
+
+ if (dst_r == TMP_REG1)
+ return emit_op_mem(compiler, STORE | ((type & SLJIT_32) ? INT_SIZE : WORD_SIZE), TMP_REG1, dst, dstw, TMP_REG2);
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins = 0;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ switch (SLJIT_SIMD_GET_OPCODE(type)) {
+ case SLJIT_SIMD_OP2_AND:
+ ins = AND_v;
+ break;
+ case SLJIT_SIMD_OP2_OR:
+ ins = ORR_v;
+ break;
+ case SLJIT_SIMD_OP2_XOR:
+ ins = EOR_v;
+ break;
+ }
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 30;
+
+ return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_reg,
+ sljit_s32 mem_reg)
+{
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
+
+#ifdef __ARM_FEATURE_ATOMICS
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV32:
+ case SLJIT_MOV_U32:
+ ins = LDR ^ (1 << 30);
+ break;
+ case SLJIT_MOV_U16:
+ ins = LDRH;
+ break;
+ case SLJIT_MOV_U8:
+ ins = LDRB;
+ break;
+ default:
+ ins = LDR;
+ break;
+ }
+#else /* !__ARM_FEATURE_ATOMICS */
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV32:
+ case SLJIT_MOV_U32:
+ ins = LDXR ^ (1 << 30);
+ break;
+ case SLJIT_MOV_U8:
+ ins = LDXRB;
+ break;
+ case SLJIT_MOV_U16:
+ ins = LDXRH;
+ break;
+ default:
+ ins = LDXR;
+ break;
+ }
+#endif /* ARM_FEATURE_ATOMICS */
+ return push_inst(compiler, ins | RN(mem_reg) | RT(dst_reg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src_reg,
+ sljit_s32 mem_reg,
+ sljit_s32 temp_reg)
+{
+ sljit_ins ins;
+ sljit_s32 tmp = temp_reg;
+ sljit_ins cmp = 0;
+ sljit_ins inv_bits = W_OP;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
+
+#ifdef __ARM_FEATURE_ATOMICS
+ if (op & SLJIT_SET_ATOMIC_STORED)
+ cmp = (SUBS ^ W_OP) | RD(TMP_ZERO);
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV32:
+ case SLJIT_MOV_U32:
+ ins = CAS ^ (1 << 30);
+ break;
+ case SLJIT_MOV_U16:
+ ins = CASH;
+ break;
+ case SLJIT_MOV_U8:
+ ins = CASB;
+ break;
+ default:
+ ins = CAS;
+ inv_bits = 0;
+ if (cmp)
+ cmp ^= W_OP;
+ break;
+ }
+
+ if (cmp) {
+ FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RM(temp_reg) | RD(TMP_REG1)));
+ tmp = TMP_REG1;
+ }
+ FAIL_IF(push_inst(compiler, ins | RM(tmp) | RN(mem_reg) | RD(src_reg)));
+ if (!cmp)
+ return SLJIT_SUCCESS;
+
+ FAIL_IF(push_inst(compiler, cmp | RM(tmp) | RN(temp_reg)));
+ FAIL_IF(push_inst(compiler, (CSET ^ inv_bits) | RD(tmp)));
+ return push_inst(compiler, cmp | RM(tmp) | RN(TMP_ZERO));
+#else /* !__ARM_FEATURE_ATOMICS */
+ SLJIT_UNUSED_ARG(tmp);
+ SLJIT_UNUSED_ARG(inv_bits);
+
+ if (op & SLJIT_SET_ATOMIC_STORED)
+ cmp = (SUBI ^ W_OP) | (1 << 29);
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV32:
+ case SLJIT_MOV_U32:
+ ins = STXR ^ (1 << 30);
+ break;
+ case SLJIT_MOV_U8:
+ ins = STXRB;
+ break;
+ case SLJIT_MOV_U16:
+ ins = STXRH;
+ break;
+ default:
+ ins = STXR;
+ break;
+ }
+
+ FAIL_IF(push_inst(compiler, ins | RM(TMP_REG1) | RN(mem_reg) | RT(src_reg)));
+ return cmp ? push_inst(compiler, cmp | RD(TMP_ZERO) | RN(TMP_REG1)) : SLJIT_SUCCESS;
+#endif /* __ARM_FEATURE_ATOMICS */
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
sljit_s32 dst_reg;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c
index 7d6bac077e..c27c50ddb3 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c
@@ -49,8 +49,20 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
};
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
- 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7
+static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
+ 0,
+ 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6,
+ 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6
+};
+
+static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
+ 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1
};
#define COPY_BITS(src, from, to, bits) \
@@ -75,13 +87,15 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
(reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7)
/* Thumb32 encodings. */
-#define RD4(rd) ((sljit_ins)reg_map[rd] << 8)
-#define RN4(rn) ((sljit_ins)reg_map[rn] << 16)
#define RM4(rm) ((sljit_ins)reg_map[rm])
+#define RD4(rd) ((sljit_ins)reg_map[rd] << 8)
#define RT4(rt) ((sljit_ins)reg_map[rt] << 12)
-#define DD4(dd) ((sljit_ins)freg_map[dd] << 12)
-#define DN4(dn) ((sljit_ins)freg_map[dn] << 16)
-#define DM4(dm) ((sljit_ins)freg_map[dm])
+#define RN4(rn) ((sljit_ins)reg_map[rn] << 16)
+
+#define VM4(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
+#define VD4(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
+#define VN4(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
+
#define IMM5(imm) \
(COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6))
#define IMM12(imm) \
@@ -128,9 +142,12 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define EORS 0x4040
#define EOR_W 0xea800000
#define IT 0xbf00
-#define LDR_SP 0x9800
#define LDR 0xf8d00000
+#define LDR_SP 0x9800
#define LDRD 0xe9500000
+#define LDREX 0xe8500f00
+#define LDREXB 0xe8d00f4f
+#define LDREXH 0xe8d00f5f
#define LDRI 0xf8500800
#define LSLS 0x4080
#define LSLSI 0x0000
@@ -160,6 +177,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define POP_W 0xe8bd0000
#define PUSH 0xb400
#define PUSH_W 0xe92d0000
+#define REV 0xba00
+#define REV_W 0xfa90f080
+#define REV16 0xba40
+#define REV16_W 0xfa90f090
#define RBIT 0xfa90f0a0
#define RORS 0x41c0
#define ROR_W 0xfa60f000
@@ -171,8 +192,11 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SBC_W 0xeb600000
#define SDIV 0xfb90f0f0
#define SMULL 0xfb800000
-#define STRD 0xe9400000
#define STR_SP 0x9000
+#define STRD 0xe9400000
+#define STREX 0xe8400000
+#define STREXB 0xe8c00f40
+#define STREXH 0xe8c00f50
#define SUBS 0x1a00
#define SUBSI3 0x1e00
#define SUBSI8 0x3800
@@ -195,23 +219,57 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define UXTH_W 0xfa1ff080
#define VABS_F32 0xeeb00ac0
#define VADD_F32 0xee300a00
+#define VAND 0xef000110
#define VCMP_F32 0xeeb40a40
#define VCVT_F32_S32 0xeeb80ac0
+#define VCVT_F32_U32 0xeeb80a40
#define VCVT_F64_F32 0xeeb70ac0
#define VCVT_S32_F32 0xeebd0ac0
#define VDIV_F32 0xee800a00
+#define VDUP 0xee800b10
+#define VDUP_s 0xffb00c00
+#define VEOR 0xff000110
+#define VLD1 0xf9200000
+#define VLD1_r 0xf9a00c00
+#define VLD1_s 0xf9a00000
#define VLDR_F32 0xed100a00
#define VMOV_F32 0xeeb00a40
#define VMOV 0xee000a10
#define VMOV2 0xec400a10
+#define VMOV_i 0xef800010
+#define VMOV_s 0xee000b10
+#define VMOVN 0xffb20200
#define VMRS 0xeef1fa10
#define VMUL_F32 0xee200a00
#define VNEG_F32 0xeeb10a40
+#define VORR 0xef200110
#define VPOP 0xecbd0b00
#define VPUSH 0xed2d0b00
+#define VSHLL 0xef800a10
+#define VSHR 0xef800010
+#define VSRA 0xef800110
+#define VST1 0xf9000000
+#define VST1_s 0xf9800000
#define VSTR_F32 0xed000a00
#define VSUB_F32 0xee300a40
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+
+static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
+{
+ if (compiler->scratches == -1)
+ return 0;
+
+ if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
+ fr -= SLJIT_F64_SECOND(0);
+
+ return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
+ || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
+ || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
+}
+
+#endif /* SLJIT_ARGUMENT_CHECKS */
+
static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst)
{
sljit_u16 *ptr;
@@ -488,18 +546,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
{
switch (feature_type) {
case SLJIT_HAS_FPU:
+ case SLJIT_HAS_F64_AS_F32_PAIR:
+ case SLJIT_HAS_SIMD:
#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
#else
/* Available by default. */
return 1;
#endif
+ case SLJIT_SIMD_REGS_ARE_PAIRS:
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CTZ:
+ case SLJIT_HAS_REV:
case SLJIT_HAS_ROT:
case SLJIT_HAS_CMOV:
case SLJIT_HAS_PREFETCH:
+ case SLJIT_HAS_COPY_F32:
+ case SLJIT_HAS_COPY_F64:
+ case SLJIT_HAS_ATOMIC:
return 1;
default:
@@ -615,18 +680,17 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
switch (flags & 0xffff) {
case SLJIT_CLZ:
case SLJIT_CTZ:
+ case SLJIT_REV:
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
case SLJIT_MUL:
/* No form with immediate operand. */
break;
case SLJIT_MOV:
SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG2);
return load_immediate(compiler, dst, imm);
- case SLJIT_NOT:
- if (!(flags & SET_FLAGS))
- return load_immediate(compiler, dst, ~imm);
- /* Since the flags should be set, we just fallback to the register mode.
- Although some clever things could be done here, "NOT IMM" does not worth the efforts. */
- break;
case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
imm2 = NEGATE(imm);
@@ -657,9 +721,14 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
break;
case SLJIT_ADDC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
- imm = get_imm(imm);
- if (imm != INVALID_IMM)
- return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+ imm2 = get_imm(imm);
+ if (imm2 != INVALID_IMM)
+ return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
+ if (flags & ARG2_IMM) {
+ imm = get_imm(~imm);
+ if (imm != INVALID_IMM)
+ return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+ }
break;
case SLJIT_SUB:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
@@ -712,9 +781,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
if (flags & ARG1_IMM)
break;
- imm = get_imm(imm);
+ imm2 = get_imm(imm);
+ if (imm2 != INVALID_IMM)
+ return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
+ imm = get_imm(~imm);
if (imm != INVALID_IMM)
- return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+ return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
break;
case SLJIT_AND:
imm2 = get_imm(imm);
@@ -733,6 +805,11 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
break;
case SLJIT_XOR:
+ if (imm == (sljit_uw)-1) {
+ if (IS_2_LO_REGS(dst, reg))
+ return push_inst16(compiler, MVNS | RD3(dst) | RN3(reg));
+ return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(reg));
+ }
imm = get_imm(imm);
if (imm != INVALID_IMM)
return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
@@ -788,8 +865,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
imm = arg2;
arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
FAIL_IF(load_immediate(compiler, (sljit_s32)arg2, imm));
- }
- else {
+ } else {
imm = arg1;
arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
FAIL_IF(load_immediate(compiler, (sljit_s32)arg1, imm));
@@ -829,11 +905,6 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
if (IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2));
- case SLJIT_NOT:
- SLJIT_ASSERT(arg1 == TMP_REG2);
- if (IS_2_LO_REGS(dst, arg2))
- return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2));
- return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2));
case SLJIT_CLZ:
SLJIT_ASSERT(arg1 == TMP_REG2);
return push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2));
@@ -841,6 +912,29 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
SLJIT_ASSERT(arg1 == TMP_REG2);
FAIL_IF(push_inst32(compiler, RBIT | RN4(arg2) | RD4(dst) | RM4(arg2)));
return push_inst32(compiler, CLZ | RN4(dst) | RD4(dst) | RM4(dst));
+ case SLJIT_REV:
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
+ SLJIT_ASSERT(arg1 == TMP_REG2);
+ if (IS_2_LO_REGS(dst, arg2))
+ return push_inst16(compiler, REV | RD3(dst) | RN3(arg2));
+ return push_inst32(compiler, REV_W | RN4(arg2) | RD4(dst) | RM4(arg2));
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ SLJIT_ASSERT(arg1 == TMP_REG2 && dst != TMP_REG2);
+
+ flags &= 0xffff;
+ if (IS_2_LO_REGS(dst, arg2))
+ FAIL_IF(push_inst16(compiler, REV16 | RD3(dst) | RN3(arg2)));
+ else
+ FAIL_IF(push_inst32(compiler, REV16_W | RN4(arg2) | RD4(dst) | RM4(arg2)));
+
+ if (dst == TMP_REG1 || (arg2 == TMP_REG1 && flags == SLJIT_REV_U16))
+ return SLJIT_SUCCESS;
+
+ if (reg_map[dst] <= 7)
+ return push_inst16(compiler, (flags == SLJIT_REV_U16 ? UXTH : SXTH) | RD3(dst) | RN3(dst));
+ return push_inst32(compiler, (flags == SLJIT_REV_U16 ? UXTH_W : SXTH_W) | RD4(dst) | RM4(dst));
case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
if (IS_3_LO_REGS(dst, arg1, arg2))
@@ -1176,12 +1270,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
}
if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
- FAIL_IF(push_inst32(compiler, VPUSH | DD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
+ FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
} else {
if (fsaveds > 0)
- FAIL_IF(push_inst32(compiler, VPUSH | DD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
+ FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
- FAIL_IF(push_inst32(compiler, VPUSH | DD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
+ FAIL_IF(push_inst32(compiler, VPUSH | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
}
}
@@ -1258,17 +1352,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
switch (arg_types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
if (offset != old_offset)
- *remap_ptr++ = VMOV_F32 | SLJIT_32 | DD4(offset) | DM4(old_offset);
+ *remap_ptr++ = VMOV_F32 | SLJIT_32 | VD4(offset) | VM4(old_offset);
old_offset++;
offset++;
break;
case SLJIT_ARG_TYPE_F32:
if (f32_offset != 0) {
- *remap_ptr++ = VMOV_F32 | 0x20 | DD4(offset) | DM4(f32_offset);
+ *remap_ptr++ = VMOV_F32 | 0x20 | VD4(offset) | VM4(f32_offset);
f32_offset = 0;
} else {
if (offset != old_offset)
- *remap_ptr++ = VMOV_F32 | DD4(offset) | DM4(old_offset);
+ *remap_ptr++ = VMOV_F32 | VD4(offset) | VM4(old_offset);
f32_offset = old_offset;
old_offset++;
}
@@ -1356,6 +1450,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
+ /* Doubles are saved, so alignment is unaffected. */
if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
size += SSIZE_OF(sw);
@@ -1401,12 +1496,12 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
- FAIL_IF(push_inst32(compiler, VPOP | DD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
+ FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
} else {
if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
- FAIL_IF(push_inst32(compiler, VPOP | DD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
+ FAIL_IF(push_inst32(compiler, VPOP | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
if (fsaveds > 0)
- FAIL_IF(push_inst32(compiler, VPOP | DD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
+ FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
}
local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
@@ -1705,22 +1800,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
break;
case SLJIT_MOV_U8:
flags = BYTE_SIZE;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u8)srcw;
break;
case SLJIT_MOV_S8:
flags = BYTE_SIZE | SIGNED;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s8)srcw;
break;
case SLJIT_MOV_U16:
flags = HALF_SIZE;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u16)srcw;
break;
case SLJIT_MOV_S16:
flags = HALF_SIZE | SIGNED;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s16)srcw;
break;
default:
@@ -1729,7 +1824,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
break;
}
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw));
else if (src & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1));
@@ -1745,10 +1840,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2);
}
+ SLJIT_COMPILE_ASSERT(WORD_SIZE == 0, word_size_must_be_0);
flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
+ if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
+ flags |= HALF_SIZE;
+
if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
+ FAIL_IF(emit_op_mem(compiler, flags, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
}
@@ -1778,7 +1877,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
if (dst == TMP_REG1)
flags |= UNUSED_RETURN;
- if (src1 & SLJIT_IMM)
+ if (src1 == SLJIT_IMM)
flags |= ARG1_IMM;
else if (src1 & SLJIT_MEM) {
emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1);
@@ -1787,7 +1886,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
else
src1w = src1;
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
flags |= ARG2_IMM;
else if (src2 & SLJIT_MEM) {
src2_reg = (!(flags & ARG1_IMM) && (src1w == TMP_REG1)) ? TMP_REG2 : TMP_REG1;
@@ -1816,68 +1915,60 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 src_dst,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
+ sljit_s32 dst_reg,
+ sljit_s32 src1_reg,
+ sljit_s32 src2_reg,
+ sljit_s32 src3, sljit_sw src3w)
{
sljit_s32 is_left;
CHECK_ERROR();
- CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
+ CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
op = GET_OPCODE(op);
is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
- if (src_dst == src1) {
+ if (src1_reg == src2_reg) {
SLJIT_SKIP_CHECKS(compiler);
- return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, src_dst, 0, src_dst, 0, src2, src2w);
+ return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
}
- ADJUST_LOCAL_OFFSET(src1, src1w);
- ADJUST_LOCAL_OFFSET(src2, src2w);
+ ADJUST_LOCAL_OFFSET(src3, src3w);
- if (src2 & SLJIT_IMM) {
- src2w &= 0x1f;
+ if (src3 == SLJIT_IMM) {
+ src3w &= 0x1f;
- if (src2w == 0)
+ if (src3w == 0)
return SLJIT_SUCCESS;
- } else if (src2 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src2, src2w, TMP_REG2));
- src2 = TMP_REG2;
- }
- if (src1 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1));
- src1 = TMP_REG1;
- } else if (src1 & SLJIT_IMM) {
- FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
- src1 = TMP_REG1;
- }
-
- if (src2 & SLJIT_IMM) {
- if (reg_map[src_dst] <= 7)
- FAIL_IF(push_inst16(compiler, (is_left ? LSLSI : LSRSI) | RD3(src_dst) | RN3(src_dst) | ((sljit_ins)src2w << 6)));
+ if (IS_2_LO_REGS(dst_reg, src1_reg))
+ FAIL_IF(push_inst16(compiler, (is_left ? LSLSI : LSRSI) | RD3(dst_reg) | RN3(src1_reg) | ((sljit_ins)src3w << 6)));
else
- FAIL_IF(push_inst32(compiler, (is_left ? LSL_WI : LSR_WI) | RD4(src_dst) | RM4(src_dst) | IMM5(src2w)));
+ FAIL_IF(push_inst32(compiler, (is_left ? LSL_WI : LSR_WI) | RD4(dst_reg) | RM4(src1_reg) | IMM5(src3w)));
- src2w = (src2w ^ 0x1f) + 1;
- return push_inst32(compiler, ORR_W | RD4(src_dst) | RN4(src_dst) | RM4(src1) | (is_left ? 0x10 : 0x0) | IMM5(src2w));
+ src3w = (src3w ^ 0x1f) + 1;
+ return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(src2_reg) | (is_left ? 0x10 : 0x0) | IMM5(src3w));
}
- if (op == SLJIT_MSHL || op == SLJIT_MLSHR) {
- FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(src2) | 0x1f));
- src2 = TMP_REG2;
+ if (src3 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2));
+ src3 = TMP_REG2;
}
- if (IS_2_LO_REGS(src_dst, src2))
- FAIL_IF(push_inst16(compiler, (is_left ? LSLS : LSRS) | RD3(src_dst) | RN3(src2)));
+ if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
+ FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
+ src3 = TMP_REG2;
+ }
+
+ if (dst_reg == src1_reg && IS_2_LO_REGS(dst_reg, src3))
+ FAIL_IF(push_inst16(compiler, (is_left ? LSLS : LSRS) | RD3(dst_reg) | RN3(src3)));
else
- FAIL_IF(push_inst32(compiler, (is_left ? LSL_W : LSR_W) | RD4(src_dst) | RN4(src_dst) | RM4(src2)));
+ FAIL_IF(push_inst32(compiler, (is_left ? LSL_W : LSR_W) | RD4(dst_reg) | RN4(src1_reg) | RM4(src3)));
- FAIL_IF(push_inst32(compiler, (is_left ? LSR_WI : LSL_WI) | RD4(TMP_REG1) | RM4(src1) | (1 << 6)));
- FAIL_IF(push_inst32(compiler, EORI | RD4(TMP_REG2) | RN4(src2) | 0x1f));
+ FAIL_IF(push_inst32(compiler, (is_left ? LSR_WI : LSL_WI) | RD4(TMP_REG1) | RM4(src2_reg) | (1 << 6)));
+ FAIL_IF(push_inst32(compiler, EORI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
FAIL_IF(push_inst32(compiler, (is_left ? LSR_W : LSL_W) | RD4(TMP_REG1) | RN4(TMP_REG1) | RM4(TMP_REG2)));
- return push_inst32(compiler, ORR_W | RD4(src_dst) | RN4(src_dst) | RM4(TMP_REG1));
+ return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(TMP_REG1));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1909,16 +2000,60 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw)
{
- CHECK_REG_INDEX(check_sljit_get_register_index(reg));
- return reg_map[reg];
+ sljit_s32 size, dst_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ switch (op) {
+ case SLJIT_FAST_ENTER:
+ SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
+
+ if (FAST_IS_REG(dst))
+ return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2));
+ break;
+ case SLJIT_GET_RETURN_ADDRESS:
+ size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
+
+ if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
+ /* The size of pc is not added above. */
+ if ((size & SSIZE_OF(sw)) == 0)
+ size += SSIZE_OF(sw);
+
+ size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
+ }
+
+ SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
+ break;
+ }
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1);
+
+ return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
{
- CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
- return (freg_map[reg] << 1);
+ CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
+
+ if (type == SLJIT_GP_REGISTER)
+ return reg_map[reg];
+
+ if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
+ return freg_map[reg];
+
+ if (type != SLJIT_SIMD_REG_128)
+ return freg_map[reg] & ~0x1;
+
+ return -1;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -1954,35 +2089,35 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags,
if ((arg & REG_MASK) && (argw & 0x3) == 0) {
if (!(argw & ~0x3fc))
- return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | DD4(reg) | ((sljit_uw)argw >> 2));
+ return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)argw >> 2));
if (!(-argw & ~0x3fc))
- return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | ((sljit_uw)-argw >> 2));
+ return push_inst32(compiler, inst | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)-argw >> 2));
}
if (arg & REG_MASK) {
if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
FAIL_IF(compiler->error);
- return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg));
+ return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
}
imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
if (imm != INVALID_IMM) {
FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
- return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
+ return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
}
imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
if (imm != INVALID_IMM) {
argw = -argw;
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
- return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
+ return push_inst32(compiler, inst | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
}
}
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
if (arg & REG_MASK)
FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK))));
- return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg));
+ return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
}
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1996,41 +2131,53 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
src = TMP_FREG1;
}
- FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | DD4(TMP_FREG1) | DM4(src)));
+ FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | VD4(TMP_FREG1) | VM4(src)));
if (FAST_IS_REG(dst))
- return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1));
+ return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | VN4(TMP_FREG1));
/* Store the integer value from a VFP register. */
return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
}
-static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- op ^= SLJIT_32;
-
if (FAST_IS_REG(src))
- FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1)));
+ FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | VN4(TMP_FREG1)));
else if (src & SLJIT_MEM) {
/* Load the integer value into a VFP register. */
FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
}
else {
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
- FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1)));
+ FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | VN4(TMP_FREG1)));
}
- FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_32) | DD4(dst_r) | DM4(TMP_FREG1)));
+ FAIL_IF(push_inst32(compiler, ins | VD4(dst_r) | VM4(TMP_FREG1)));
if (dst & SLJIT_MEM)
- return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw);
+ return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
return SLJIT_SUCCESS;
}
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
+}
+
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
+}
+
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
@@ -2038,17 +2185,23 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
op ^= SLJIT_32;
if (src1 & SLJIT_MEM) {
- emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w);
+ FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
src1 = TMP_FREG1;
}
if (src2 & SLJIT_MEM) {
- emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w);
+ FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
src2 = TMP_FREG2;
}
- FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | DD4(src1) | DM4(src2)));
- return push_inst32(compiler, VMRS);
+ FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | VD4(src1) | VM4(src2)));
+ FAIL_IF(push_inst32(compiler, VMRS));
+
+ if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
+ return SLJIT_SUCCESS;
+
+ FAIL_IF(push_inst16(compiler, IT | (0x6 << 4) | 0x8));
+ return push_inst16(compiler, CMP /* Rm, Rn = r0 */);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -2068,7 +2221,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
op ^= SLJIT_32;
if (src & SLJIT_MEM) {
- emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw);
+ FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
src = dst_r;
}
@@ -2076,19 +2229,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
case SLJIT_MOV_F64:
if (src != dst_r) {
if (dst_r != TMP_FREG1)
- FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src)));
+ FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
else
dst_r = src;
}
break;
case SLJIT_NEG_F64:
- FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src)));
+ FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
break;
case SLJIT_ABS_F64:
- FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src)));
+ FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
break;
case SLJIT_CONV_F64_FROM_F32:
- FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src)));
+ FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
op ^= SLJIT_32;
break;
}
@@ -2115,27 +2268,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src1 & SLJIT_MEM) {
- emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w);
+ FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
src1 = TMP_FREG1;
}
if (src2 & SLJIT_MEM) {
- emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w);
+ FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
src2 = TMP_FREG2;
}
switch (GET_OPCODE(op)) {
case SLJIT_ADD_F64:
- FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2)));
+ FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
break;
case SLJIT_SUB_F64:
- FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2)));
+ FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
break;
case SLJIT_MUL_F64:
- FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2)));
+ FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
break;
case SLJIT_DIV_F64:
- FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2)));
+ FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
break;
+ case SLJIT_COPYSIGN_F64:
+ FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(src2) | RT4(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
+ FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src1)));
+ FAIL_IF(push_inst32(compiler, CMPI_W | RN4(TMP_REG1) | 0));
+ FAIL_IF(push_inst16(compiler, IT | (0xb << 4) | 0x8));
+ return push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(dst_r));
}
if (!(dst & SLJIT_MEM))
@@ -2143,23 +2302,99 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw);
}
-/* --------------------------------------------------------------------- */
-/* Other instructions */
-/* --------------------------------------------------------------------- */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f32 value)
+{
+#if defined(__ARM_NEON) && __ARM_NEON
+ sljit_u32 exp;
+ sljit_ins ins;
+#endif /* NEON */
+ union {
+ sljit_u32 imm;
+ sljit_f32 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset32(compiler, freg, value));
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+ u.value = value;
+
+#if defined(__ARM_NEON) && __ARM_NEON
+ if ((u.imm << (32 - 19)) == 0) {
+ exp = (u.imm >> (23 + 2)) & 0x3f;
+
+ if (exp == 0x20 || exp == 0x1f) {
+ ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
+ return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
+ }
+ }
+#endif /* NEON */
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
+ return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG1));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
{
+#if defined(__ARM_NEON) && __ARM_NEON
+ sljit_u32 exp;
+ sljit_ins ins;
+#endif /* NEON */
+ union {
+ sljit_u32 imm[2];
+ sljit_f64 value;
+ } u;
+
CHECK_ERROR();
- CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
- ADJUST_LOCAL_OFFSET(dst, dstw);
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
- SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
+ u.value = value;
- if (FAST_IS_REG(dst))
- return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2));
+#if defined(__ARM_NEON) && __ARM_NEON
+ if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
+ exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
- /* Memory. */
- return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1);
+ if (exp == 0x100 || exp == 0xff) {
+ ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
+ return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
+ }
+ }
+#endif /* NEON */
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
+ if (u.imm[0] == u.imm[1])
+ return push_inst32(compiler, VMOV2 | RN4(TMP_REG1) | RT4(TMP_REG1) | VM4(freg));
+
+ FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
+ return push_inst32(compiler, VMOV2 | RN4(TMP_REG2) | RT4(TMP_REG1) | VM4(freg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ sljit_s32 reg2;
+ sljit_ins inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ if (reg & REG_PAIR_MASK) {
+ reg2 = REG_PAIR_SECOND(reg);
+ reg = REG_PAIR_FIRST(reg);
+
+ inst = VMOV2 | RN4(reg) | RT4(reg2) | VM4(freg);
+ } else {
+ inst = VMOV | VN4(freg) | RT4(reg);
+
+ if (!(op & SLJIT_32))
+ inst |= 1 << 7;
+ }
+
+ if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
+ inst |= 1 << 20;
+
+ return push_inst32(compiler, inst);
}
/* --------------------------------------------------------------------- */
@@ -2170,15 +2405,17 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
{
switch (type) {
case SLJIT_EQUAL:
+ case SLJIT_ATOMIC_STORED:
case SLJIT_F_EQUAL:
case SLJIT_ORDERED_EQUAL:
- case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */
+ case SLJIT_UNORDERED_OR_EQUAL:
return 0x0;
case SLJIT_NOT_EQUAL:
+ case SLJIT_ATOMIC_NOT_STORED:
case SLJIT_F_NOT_EQUAL:
case SLJIT_UNORDERED_OR_NOT_EQUAL:
- case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */
+ case SLJIT_ORDERED_NOT_EQUAL:
return 0x1;
case SLJIT_CARRY:
@@ -2453,18 +2690,18 @@ static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit
switch (arg_types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
if (offset != new_offset)
- FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | DD4(new_offset) | DM4(offset)));
+ FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(new_offset) | VM4(offset)));
new_offset++;
offset++;
break;
case SLJIT_ARG_TYPE_F32:
if (f32_offset != 0) {
- FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | DD4(f32_offset) | DM4(offset)));
+ FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(f32_offset) | VM4(offset)));
f32_offset = 0;
} else {
if (offset != new_offset)
- FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | DD4(new_offset) | DM4(offset)));
+ FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(new_offset) | VM4(offset)));
f32_offset = new_offset;
new_offset++;
}
@@ -2546,7 +2783,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
if (FAST_IS_REG(src)) {
SLJIT_ASSERT(reg_map[src] != 14);
return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
@@ -2645,8 +2882,8 @@ static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *com
if (FAST_IS_REG(src)) {
if (op & SLJIT_32)
- return push_inst32(compiler, VMOV | (1 << 20) | DN4(src) | RT4(SLJIT_R0));
- return push_inst32(compiler, VMOV2 | (1 << 20) | DM4(src) | RT4(SLJIT_R0) | RN4(SLJIT_R1));
+ return push_inst32(compiler, VMOV | (1 << 20) | VN4(src) | RT4(SLJIT_R0));
+ return push_inst32(compiler, VMOV2 | (1 << 20) | VM4(src) | RT4(SLJIT_R0) | RN4(SLJIT_R1));
}
SLJIT_SKIP_CHECKS(compiler);
@@ -2711,23 +2948,47 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_reg)
{
sljit_uw cc, tmp;
CHECK_ERROR();
- CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+ CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ if (src2_reg != dst_reg && src1 == dst_reg) {
+ src1 = src2_reg;
+ src1w = 0;
+ src2_reg = dst_reg;
+ type ^= 0x1;
+ }
+
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG2));
+
+ if (src2_reg != dst_reg) {
+ src1 = src2_reg;
+ src1w = 0;
+ type ^= 0x1;
+ } else {
+ src1 = TMP_REG1;
+ src1w = 0;
+ }
+ } else if (dst_reg != src2_reg)
+ FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(dst_reg, src2_reg)));
cc = get_cc(compiler, type & ~SLJIT_32);
- if (!(src & SLJIT_IMM)) {
+ if (src1 != SLJIT_IMM) {
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
- return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src));
+ return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src1));
}
- tmp = (sljit_uw) srcw;
+ tmp = (sljit_uw)src1w;
if (tmp < 0x10000) {
/* set low 16 bits, set hi 16 bits to 0. */
@@ -2736,13 +2997,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
| COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff));
}
- tmp = get_imm((sljit_uw)srcw);
+ tmp = get_imm((sljit_uw)src1w);
if (tmp != INVALID_IMM) {
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp);
}
- tmp = get_imm(~(sljit_uw)srcw);
+ tmp = get_imm(~(sljit_uw)src1w);
if (tmp != INVALID_IMM) {
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp);
@@ -2750,13 +3011,43 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4));
- tmp = (sljit_uw) srcw;
+ tmp = (sljit_uw)src1w;
FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg)
| COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)));
return push_inst32(compiler, MOVT | RD4(dst_reg)
| COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_freg)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ type ^= SLJIT_32;
+
+ if (dst_freg != src2_freg) {
+ if (dst_freg == src1) {
+ src1 = src2_freg;
+ src1w = 0;
+ type ^= 0x1;
+ } else
+ FAIL_IF(push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src2_freg)));
+ }
+
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
+ src1 = TMP_FREG1;
+ }
+
+ FAIL_IF(push_inst16(compiler, IT | (get_cc(compiler, type & ~SLJIT_32) << 4) | 0x8));
+ return push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src1));
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 reg,
sljit_s32 mem, sljit_sw memw)
@@ -2770,7 +3061,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
if (!(reg & REG_PAIR_MASK))
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
- if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32)) {
+ if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)) {
if ((mem & REG_MASK) == 0) {
if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) {
imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff));
@@ -2781,7 +3072,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
imm = get_imm((sljit_uw)(memw & ~0xfff));
if (imm != INVALID_IMM)
- memw &= 0xff;
+ memw &= 0xfff;
}
if (imm == INVALID_IMM) {
@@ -3058,11 +3349,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
CHECK_ERROR();
CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
- if (type & SLJIT_MEM_UNALIGNED_32)
+ if (type & SLJIT_MEM_ALIGNED_32)
return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
if (type & SLJIT_MEM_STORE) {
- FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | DN4(freg) | RT4(TMP_REG2)));
+ FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | RT4(TMP_REG2)));
if (type & SLJIT_32)
return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1);
@@ -3071,13 +3362,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
mem |= SLJIT_MEM;
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1));
- FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | DN4(freg) | 0x80 | RT4(TMP_REG2)));
+ FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | 0x80 | RT4(TMP_REG2)));
return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw + 4, TMP_REG1);
}
if (type & SLJIT_32) {
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
- return push_inst32(compiler, VMOV | DN4(freg) | RT4(TMP_REG2));
+ return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG2));
}
FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
@@ -3085,11 +3376,715 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, mem, memw + 4, TMP_REG1));
- return push_inst32(compiler, VMOV2 | DM4(freg) | RT4(TMP_REG2) | RN4(TMP_REG1));
+ return push_inst32(compiler, VMOV2 | VM4(freg) | RT4(TMP_REG2) | RN4(TMP_REG1));
+}
+
+static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
+{
+ sljit_uw imm;
+ sljit_s32 mem = *mem_ptr;
+
+ if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
+ *mem_ptr = TMP_REG1;
+ return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6));
+ }
+
+ if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
+ *mem_ptr = TMP_REG1;
+ return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
+ }
+
+ mem &= REG_MASK;
+
+ if (memw == 0) {
+ *mem_ptr = mem;
+ return SLJIT_SUCCESS;
+ }
+
+ *mem_ptr = TMP_REG1;
+ imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
+
+ if (imm != INVALID_IMM)
+ return push_inst32(compiler, ((memw < 0) ? SUB_WI : ADD_WI) | RD4(TMP_REG1) | RN4(mem) | imm);
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
+ return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem));
+}
+
+static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
+{
+ freg += freg & 0x1;
+
+ SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
+
+ if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
+ freg--;
+
+ return freg;
+}
+
+#define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ freg = simd_get_quad_reg_index(freg);
+
+ if (!(srcdst & SLJIT_MEM)) {
+ if (reg_size == 4)
+ srcdst = simd_get_quad_reg_index(srcdst);
+
+ if (type & SLJIT_SIMD_STORE)
+ ins = VD4(srcdst) | VN4(freg) | VM4(freg);
+ else
+ ins = VD4(freg) | VN4(srcdst) | VM4(srcdst);
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 6;
+
+ return push_inst32(compiler, VORR | ins);
+ }
+
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
+
+ if (elem_size > 3)
+ elem_size = 3;
+
+ ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD4(freg)
+ | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
+
+ SLJIT_ASSERT(reg_size >= alignment);
+
+ if (alignment == 3)
+ ins |= 0x10;
+ else if (alignment >= 4)
+ ins |= 0x20;
+
+ return push_inst32(compiler, ins | RN4(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
+}
+
+static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
+{
+ sljit_ins result;
+
+ if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
+ elem_size = 1;
+ value = (sljit_u16)value;
+ }
+
+ if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
+ elem_size = 0;
+ value = (sljit_u8)value;
+ }
+
+ switch (elem_size) {
+ case 0:
+ SLJIT_ASSERT(value <= 0xff);
+ result = 0xe00;
+ break;
+ case 1:
+ SLJIT_ASSERT(value <= 0xffff);
+ result = 0;
+
+ while (1) {
+ if (value <= 0xff) {
+ result |= 0x800;
+ break;
+ }
+
+ if ((value & 0xff) == 0) {
+ value >>= 8;
+ result |= 0xa00;
+ break;
+ }
+
+ if (result != 0)
+ return ~(sljit_ins)0;
+
+ value ^= (sljit_uw)0xffff;
+ result = (1 << 5);
+ }
+ break;
+ default:
+ SLJIT_ASSERT(value <= 0xffffffff);
+ result = 0;
+
+ while (1) {
+ if (value <= 0xff) {
+ result |= 0x000;
+ break;
+ }
+
+ if ((value & ~(sljit_uw)0xff00) == 0) {
+ value >>= 8;
+ result |= 0x200;
+ break;
+ }
+
+ if ((value & ~(sljit_uw)0xff0000) == 0) {
+ value >>= 16;
+ result |= 0x400;
+ break;
+ }
+
+ if ((value & ~(sljit_uw)0xff000000) == 0) {
+ value >>= 24;
+ result |= 0x600;
+ break;
+ }
+
+ if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
+ value >>= 8;
+ result |= 0xc00;
+ break;
+ }
+
+ if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
+ value >>= 16;
+ result |= 0xd00;
+ break;
+ }
+
+ if (result != 0)
+ return ~(sljit_ins)0;
+
+ value = ~value;
+ result = (1 << 5);
+ }
+ break;
+ }
+
+ return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 21) | result;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins, imm;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ freg = simd_get_quad_reg_index(freg);
+
+ if (src == SLJIT_IMM && srcw == 0)
+ return push_inst32(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD4(freg));
+
+ if (SLJIT_UNLIKELY(elem_size == 3)) {
+ SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw));
+ src = freg;
+ } else if (freg != src)
+ FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)));
+
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+
+ if (freg != src)
+ return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src));
+ return SLJIT_SUCCESS;
+ }
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
+
+ ins = (sljit_ins)(elem_size << 6);
+
+ if (reg_size == 4)
+ ins |= 1 << 5;
+
+ return push_inst32(compiler, VLD1_r | ins | VD4(freg) | RN4(src) | 0xf);
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ SLJIT_ASSERT(elem_size == 2);
+ ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 6;
+
+ return push_inst32(compiler, VDUP_s | ins | VD4(freg) | (sljit_ins)freg_map[src]);
+ }
+
+ if (src == SLJIT_IMM) {
+ if (elem_size < 2)
+ srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
+
+ imm = simd_get_imm(elem_size, (sljit_uw)srcw);
+
+ if (imm != ~(sljit_ins)0) {
+ if (reg_size == 4)
+ imm |= (sljit_ins)1 << 6;
+
+ return push_inst32(compiler, VMOV_i | imm | VD4(freg));
+ }
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
+ src = TMP_REG1;
+ }
+
+ switch (elem_size) {
+ case 0:
+ ins = 1 << 22;
+ break;
+ case 1:
+ ins = 1 << 5;
+ break;
+ default:
+ ins = 0;
+ break;
+ }
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 21;
+
+ return push_inst32(compiler, VDUP | ins | VN4(freg) | RT4(src));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg, sljit_s32 lane_index,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ freg = simd_get_quad_reg_index(freg);
+
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
+ if (lane_index == 1)
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+
+ if (srcdst != freg)
+ FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(srcdst) | VM4(srcdst)));
+
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ return push_inst32(compiler, VMOV_i | VD4(freg));
+ }
+
+ if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) {
+ FAIL_IF(push_inst32(compiler, VORR | ins | VD4(TMP_FREG2) | VN4(freg) | VM4(freg)));
+ srcdst = TMP_FREG2;
+ srcdstw = 0;
+ }
+ }
+
+ FAIL_IF(push_inst32(compiler, VMOV_i | ins | VD4(freg)));
+ }
+
+ if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
+ lane_index -= (0x8 >> elem_size);
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ }
+
+ if (srcdst & SLJIT_MEM) {
+ if (elem_size == 3)
+ return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw);
+
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
+
+ lane_index = lane_index << elem_size;
+ ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
+ return push_inst32(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD4(freg) | RN4(srcdst) | 0xf);
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (elem_size == 3) {
+ if (type & SLJIT_SIMD_STORE)
+ return push_inst32(compiler, VORR | VD4(srcdst) | VN4(freg) | VM4(freg));
+ return push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(freg) | VM4(srcdst));
+ }
+
+ if (type & SLJIT_SIMD_STORE) {
+ if (freg_ebit_map[freg] == 0) {
+ if (lane_index == 1)
+ freg = SLJIT_F64_SECOND(freg);
+
+ return push_inst32(compiler, VMOV_F32 | VD4(srcdst) | VM4(freg));
+ }
+
+ FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1)));
+ return push_inst32(compiler, VMOV | VN4(srcdst) | RT4(TMP_REG1));
+ }
+
+ FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(srcdst) | RT4(TMP_REG1)));
+ return push_inst32(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1));
+ }
+
+ if (srcdst == SLJIT_IMM) {
+ if (elem_size < 2)
+ srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
+ srcdst = TMP_REG1;
+ }
+
+ if (elem_size == 0)
+ ins = 0x400000;
+ else if (elem_size == 1)
+ ins = 0x20;
+ else
+ ins = 0;
+
+ lane_index = lane_index << elem_size;
+ ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
+
+ if (type & SLJIT_SIMD_STORE) {
+ ins |= (1 << 20);
+
+ if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
+ ins |= (1 << 23);
+ }
+
+ return push_inst32(compiler, VMOV_s | ins | VN4(freg) | RT4(srcdst));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_s32 src_lane_index)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4) {
+ freg = simd_get_quad_reg_index(freg);
+ src = simd_get_quad_reg_index(src);
+
+ if (src_lane_index >= (0x8 >> elem_size)) {
+ src_lane_index -= (0x8 >> elem_size);
+ src += SLJIT_QUAD_OTHER_HALF(src);
+ }
+ }
+
+ if (elem_size == 3) {
+ if (freg != src)
+ FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)));
+
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+
+ if (freg != src)
+ return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src));
+ return SLJIT_SUCCESS;
+ }
+
+ ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 6;
+
+ return push_inst32(compiler, VDUP_s | ins | VD4(freg) | VM4(src));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+ sljit_s32 dst_reg;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ freg = simd_get_quad_reg_index(freg);
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
+ if (reg_size == 4 && elem2_size - elem_size == 1)
+ FAIL_IF(push_inst32(compiler, VLD1 | (0x7 << 8) | VD4(freg) | RN4(src) | 0xf));
+ else
+ FAIL_IF(push_inst32(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD4(freg) | RN4(src) | 0xf));
+ src = freg;
+ } else if (reg_size == 4)
+ src = simd_get_quad_reg_index(src);
+
+ if (!(type & SLJIT_SIMD_FLOAT)) {
+ dst_reg = (reg_size == 4) ? freg : TMP_FREG2;
+
+ do {
+ FAIL_IF(push_inst32(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 28))
+ | ((sljit_ins)1 << (19 + elem_size)) | VD4(dst_reg) | VM4(src)));
+ src = dst_reg;
+ } while (++elem_size < elem2_size);
+
+ if (dst_reg == TMP_FREG2)
+ return push_inst32(compiler, VORR | VD4(freg) | VN4(TMP_FREG2) | VM4(TMP_FREG2));
+ return SLJIT_SUCCESS;
+ }
+
+ /* No SIMD variant, must use VFP instead. */
+ SLJIT_ASSERT(reg_size == 4);
+
+ if (freg == src) {
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20));
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src));
+ }
+
+ FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src)));
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins, imms;
+ sljit_s32 dst_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
+
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ switch (elem_size) {
+ case 0:
+ imms = 0x243219;
+ ins = VSHR | (1 << 28) | (0x9 << 16);
+ break;
+ case 1:
+ imms = (reg_size == 4) ? 0x243219 : 0x2231;
+ ins = VSHR | (1 << 28) | (0x11 << 16);
+ break;
+ case 2:
+ imms = (reg_size == 4) ? 0x2231 : 0x21;
+ ins = VSHR | (1 << 28) | (0x21 << 16);
+ break;
+ default:
+ imms = 0x21;
+ ins = VSHR | (1 << 28) | (0x1 << 16) | (1 << 7);
+ break;
+ }
+
+ if (reg_size == 4) {
+ freg = simd_get_quad_reg_index(freg);
+ ins |= (sljit_ins)1 << 6;
+ }
+
+ SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
+ FAIL_IF(push_inst32(compiler, ins | VD4(TMP_FREG2) | VM4(freg)));
+
+ if (reg_size == 4 && elem_size > 0)
+ FAIL_IF(push_inst32(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
+
+ ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
+
+ while (imms >= 0x100) {
+ FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | ((imms & 0xff) << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
+ imms >>= 8;
+ }
+
+ FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | (1 << 7) | (imms << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(dst_r) | VN4(TMP_FREG2)));
+
+ if (reg_size == 4 && elem_size == 0) {
+ SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
+ FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(TMP_REG2)| VN4(TMP_FREG1)));
+ FAIL_IF(push_inst32(compiler, ORR_W | RD4(dst_r) | RN4(dst_r) | RM4(TMP_REG2) | (0x2 << 12)));
+ }
+
+ if (dst_r == TMP_REG1)
+ return emit_op_mem(compiler, STORE | WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins = 0;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ switch (SLJIT_SIMD_GET_OPCODE(type)) {
+ case SLJIT_SIMD_OP2_AND:
+ ins = VAND;
+ break;
+ case SLJIT_SIMD_OP2_OR:
+ ins = VORR;
+ break;
+ case SLJIT_SIMD_OP2_XOR:
+ ins = VEOR;
+ break;
+ }
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4) {
+ dst_freg = simd_get_quad_reg_index(dst_freg);
+ src1_freg = simd_get_quad_reg_index(src1_freg);
+ src2_freg = simd_get_quad_reg_index(src2_freg);
+ ins |= (sljit_ins)1 << 6;
+ }
+
+ return push_inst32(compiler, ins | VD4(dst_freg) | VN4(src1_freg) | VM4(src2_freg));
}
#undef FPU_LOAD
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_reg,
+ sljit_s32 mem_reg)
+{
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV_U8:
+ ins = LDREXB;
+ break;
+ case SLJIT_MOV_U16:
+ ins = LDREXH;
+ break;
+ default:
+ ins = LDREX;
+ break;
+ }
+
+ return push_inst32(compiler, ins | RN4(mem_reg) | RT4(dst_reg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src_reg,
+ sljit_s32 mem_reg,
+ sljit_s32 temp_reg)
+{
+ sljit_ins ins;
+
+ /* temp_reg == mem_reg is undefined so use another temp register */
+ SLJIT_UNUSED_ARG(temp_reg);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV_U8:
+ ins = STREXB | RM4(TMP_REG1);
+ break;
+ case SLJIT_MOV_U16:
+ ins = STREXH | RM4(TMP_REG1);
+ break;
+ default:
+ ins = STREX | RD4(TMP_REG1);
+ break;
+ }
+
+ FAIL_IF(push_inst32(compiler, ins | RN4(mem_reg) | RT4(src_reg)));
+ if (op & SLJIT_SET_ATOMIC_STORED)
+ return push_inst32(compiler, CMPI_W | RN4(TMP_REG1));
+
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{
struct sljit_const *const_;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
index e6853c98f6..9620b945f6 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
@@ -26,6 +26,49 @@
/* mips 32-bit arch dependent functions. */
+static sljit_s32 emit_copysign(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_sw src1, sljit_sw src2, sljit_sw dst)
+{
+ int is_32 = (op & SLJIT_32);
+ sljit_ins mfhc = MFC1, mthc = MTC1;
+ sljit_ins src1_r = FS(src1), src2_r = FS(src2), dst_r = FS(dst);
+
+ if (!is_32) {
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ mfhc = MFHC1;
+ mthc = MTHC1;
+ break;
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ src1_r |= (1 << 11);
+ src2_r |= (1 << 11);
+ dst_r |= (1 << 11);
+ break;
+ }
+ }
+
+ FAIL_IF(push_inst(compiler, mfhc | T(TMP_REG1) | src1_r, DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, mfhc | T(TMP_REG2) | src2_r, DR(TMP_REG2)));
+ if (!is_32 && src1 != dst)
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(src1) | FD(dst), MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ else
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+ FAIL_IF(push_inst(compiler, XOR | T(TMP_REG1) | D(TMP_REG2) | S(TMP_REG2), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, SRL | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, SLL | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, XOR | T(TMP_REG2) | D(TMP_REG1) | S(TMP_REG1), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, mthc | T(TMP_REG1) | dst_r, MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ if (mthc == MTC1)
+ return push_inst(compiler, NOP, UNMOVABLE_INS);
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
+}
+
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm)
{
if (!(imm & ~0xffff))
@@ -44,6 +87,108 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_
return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
+{
+ union {
+ struct {
+#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN
+ sljit_s32 lo;
+ sljit_s32 hi;
+#else /* !SLJIT_LITTLE_ENDIAN */
+ sljit_s32 hi;
+ sljit_s32 lo;
+#endif /* SLJIT_LITTLE_ENDIAN */
+ } bin;
+ sljit_f64 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
+
+ u.value = value;
+
+ if (u.bin.lo != 0)
+ FAIL_IF(load_immediate(compiler, DR(TMP_REG1), u.bin.lo));
+ if (u.bin.hi != 0)
+ FAIL_IF(load_immediate(compiler, DR(TMP_REG2), u.bin.hi));
+
+ FAIL_IF(push_inst(compiler, MTC1 | (u.bin.lo != 0 ? T(TMP_REG1) : TA(0)) | FS(freg), MOVABLE_INS));
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ return push_inst(compiler, MTHC1 | (u.bin.hi != 0 ? T(TMP_REG2) : TA(0)) | FS(freg), MOVABLE_INS);
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ FAIL_IF(push_inst(compiler, MTC1 | (u.bin.hi != 0 ? T(TMP_REG2) : TA(0)) | FS(freg) | (1 << 11), MOVABLE_INS));
+ break;
+ }
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ sljit_s32 reg2 = 0;
+ sljit_ins inst = FS(freg);
+ sljit_ins mthc = MTC1, mfhc = MFC1;
+ int is_32 = (op & SLJIT_32);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ op = GET_OPCODE(op);
+ if (reg & REG_PAIR_MASK) {
+ reg2 = REG_PAIR_SECOND(reg);
+ reg = REG_PAIR_FIRST(reg);
+
+ inst |= T(reg2);
+
+ if (op == SLJIT_COPY_TO_F64)
+ FAIL_IF(push_inst(compiler, MTC1 | inst, MOVABLE_INS));
+ else
+ FAIL_IF(push_inst(compiler, MFC1 | inst, DR(reg2)));
+
+ inst = FS(freg) | (1 << 11);
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ if (cpu_feature_list & CPU_FEATURE_FR) {
+ mthc = MTHC1;
+ mfhc = MFHC1;
+ inst = FS(freg);
+ }
+#endif /* SLJIT_MIPS_REV >= 2 */
+ }
+
+ inst |= T(reg);
+ if (!is_32 && !reg2) {
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ mthc = MTHC1;
+ mfhc = MFHC1;
+ break;
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ inst |= (1 << 11);
+ break;
+ }
+ }
+
+ if (op == SLJIT_COPY_TO_F64)
+ FAIL_IF(push_inst(compiler, mthc | inst, MOVABLE_INS));
+ else
+ FAIL_IF(push_inst(compiler, mfhc | inst, DR(reg)));
+
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ if (mthc == MTC1 || mfhc == MFC1)
+ return push_inst(compiler, NOP, UNMOVABLE_INS);
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins *)addr;
@@ -74,6 +219,11 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
sljit_ins ins = NOP;
sljit_u8 offsets[4];
sljit_u8 *offsets_ptr = offsets;
+#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN
+ sljit_ins f64_hi = TA(7), f64_lo = TA(6);
+#else
+ sljit_ins f64_hi = TA(6), f64_lo = TA(7);
+#endif /* SLJIT_LITTLE_ENDIAN */
SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
@@ -138,20 +288,28 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
switch (types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
- if (*offsets_ptr < 4 * sizeof (sljit_sw)) {
+ if (*offsets_ptr < 4 * sizeof(sljit_sw)) {
if (prev_ins != NOP)
FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS));
/* Must be preceded by at least one other argument,
* and its starting offset must be 8 because of alignment. */
SLJIT_ASSERT((*offsets_ptr >> 2) == 2);
-
- prev_ins = MFC1 | TA(6) | FS(float_arg_count) | (1 << 11);
- ins = MFC1 | TA(7) | FS(float_arg_count);
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ prev_ins = MFHC1 | f64_hi | FS(float_arg_count);
+ break;
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ prev_ins = MFC1 | f64_hi | FS(float_arg_count) | (1 << 11);
+ break;
+ }
+ ins = MFC1 | f64_lo | FS(float_arg_count);
} else if (*offsets_ptr < 254)
ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr);
else if (*offsets_ptr == 254)
- ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1);
+ ins = MOV_fmt(FMT_D) | FS(SLJIT_FR0) | FD(TMP_FREG1);
float_arg_count--;
break;
@@ -161,7 +319,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
else if (*offsets_ptr < 254)
ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr);
else if (*offsets_ptr == 254)
- ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1);
+ ins = MOV_fmt(FMT_S) | FS(SLJIT_FR0) | FD(TMP_FREG1);
float_arg_count--;
break;
@@ -285,7 +443,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
else if (src != PIC_ADDR_REG)
FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c
index d2a5924f8e..52a0d3fb7a 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c
@@ -26,6 +26,23 @@
/* mips 64-bit arch dependent functions. */
+static sljit_s32 emit_copysign(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src1, sljit_s32 src2, sljit_s32 dst)
+{
+ FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | T(TMP_REG1) | FS(src1), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | T(TMP_REG2) | FS(src2), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, XOR | S(TMP_REG2) | T(TMP_REG1) | D(TMP_REG2), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | T(TMP_REG2) | D(TMP_REG1), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, SELECT_OP(DMTC1, MTC1) | T(TMP_REG1) | FS(dst), MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ if (!(op & SLJIT_32))
+ return push_inst(compiler, NOP, UNMOVABLE_INS);
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
+}
+
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm)
{
sljit_s32 shift = 32;
@@ -128,6 +145,57 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_
return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
+{
+ union {
+ sljit_sw imm;
+ sljit_f64 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
+
+ u.value = value;
+
+ if (u.imm == 0) {
+ FAIL_IF(push_inst(compiler, DMTC1 | TA(0) | FS(freg), MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
+ }
+
+ FAIL_IF(load_immediate(compiler, DR(TMP_REG1), u.imm));
+ FAIL_IF(push_inst(compiler, DMTC1 | T(TMP_REG1) | FS(freg), MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ sljit_ins inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ inst = T(reg) | FS(freg);
+
+ if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
+ FAIL_IF(push_inst(compiler, SELECT_OP(DMTC1, MTC1) | inst, MOVABLE_INS));
+ else
+ FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | inst, DR(reg)));
+
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ if (!(op & SLJIT_32))
+ return push_inst(compiler, NOP, UNMOVABLE_INS);
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins *)addr;
@@ -183,17 +251,17 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
switch (types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
if (arg_count != float_arg_count)
- ins = MOV_S | FMT_D | FS(float_arg_count) | FD(arg_count);
+ ins = MOV_fmt(FMT_D) | FS(float_arg_count) | FD(arg_count);
else if (arg_count == 1)
- ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1);
+ ins = MOV_fmt(FMT_D) | FS(SLJIT_FR0) | FD(TMP_FREG1);
arg_count--;
float_arg_count--;
break;
case SLJIT_ARG_TYPE_F32:
if (arg_count != float_arg_count)
- ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count);
+ ins = MOV_fmt(FMT_S) | FS(float_arg_count) | FD(arg_count);
else if (arg_count == 1)
- ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1);
+ ins = MOV_fmt(FMT_S) | FS(SLJIT_FR0) | FD(TMP_FREG1);
arg_count--;
float_arg_count--;
break;
@@ -300,7 +368,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
else if (src != PIC_ADDR_REG)
FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
index 9afe901c38..807b3474ea 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
@@ -26,9 +26,12 @@
/* Latest MIPS architecture. */
-#ifndef __mips_hard_float
+#ifdef HAVE_PRCTL
+#include <sys/prctl.h>
+#endif
+
+#if !defined(__mips_hard_float) || defined(__mips_single_float)
/* Disable automatic detection, covers both -msoft-float and -mno-float */
-#undef SLJIT_IS_FPU_AVAILABLE
#define SLJIT_IS_FPU_AVAILABLE 0
#endif
@@ -42,6 +45,14 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
return "MIPS64-R6" SLJIT_CPUINFO;
#endif /* SLJIT_CONFIG_MIPS_32 */
+#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 5)
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+ return "MIPS32-R5" SLJIT_CPUINFO;
+#else /* !SLJIT_CONFIG_MIPS_32 */
+ return "MIPS64-R5" SLJIT_CPUINFO;
+#endif /* SLJIT_CONFIG_MIPS_32 */
+
#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -83,27 +94,31 @@ typedef sljit_u32 sljit_ins;
#define EQUAL_FLAG 3
#define OTHER_FLAG 1
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
+ 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31, 3, 1
+};
+
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
#define TMP_FREG3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3)
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31
-};
-
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
- 0, 0, 14, 2, 4, 6, 8, 18, 30, 28, 26, 24, 22, 20, 12, 10, 16
+static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3) << 1) + 1] = {
+ 0,
+ 0, 14, 2, 4, 6, 8, 18, 30, 28, 26, 24, 22, 20,
+ 12, 10, 16,
+ 1, 15, 3, 5, 7, 9, 19, 31, 29, 27, 25, 23, 21,
+ 13, 11, 17
};
-#else
+#else /* !SLJIT_CONFIG_MIPS_32 */
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
0, 0, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 30, 29, 28, 27, 26, 25, 24, 12, 11, 10
};
-#endif
+#endif /* SLJIT_CONFIG_MIPS_32 */
/* --------------------------------------------------------------------- */
/* Instrucion forms */
@@ -200,10 +215,18 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define DMULTU (HI(0) | LO(29))
#endif /* SLJIT_MIPS_REV >= 6 */
#define DIV_S (HI(17) | FMT_S | LO(3))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
#define DINSU (HI(31) | LO(6))
+#endif /* SLJIT_MIPS_REV >= 2 */
+#define DMFC1 (HI(17) | (1 << 21))
+#define DMTC1 (HI(17) | (5 << 21))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
#define DROTR (HI(0) | (1 << 21) | LO(58))
#define DROTR32 (HI(0) | (1 << 21) | LO(62))
#define DROTRV (HI(0) | (1 << 6) | LO(22))
+#define DSBH (HI(31) | (2 << 6) | LO(36))
+#define DSHD (HI(31) | (5 << 6) | LO(36))
+#endif /* SLJIT_MIPS_REV >= 2 */
#define DSLL (HI(0) | LO(56))
#define DSLL32 (HI(0) | LO(60))
#define DSLLV (HI(0) | LO(20))
@@ -232,6 +255,9 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define LWR (HI(38))
#define LWC1 (HI(49))
#define MFC1 (HI(17))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+#define MFHC1 (HI(17) | (3 << 21))
+#endif /* SLJIT_MIPS_REV >= 2 */
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#define MOD (HI(0) | (3 << 6) | LO(26))
#define MODU (HI(0) | (3 << 6) | LO(27))
@@ -239,8 +265,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define MFHI (HI(0) | LO(16))
#define MFLO (HI(0) | LO(18))
#endif /* SLJIT_MIPS_REV >= 6 */
-#define MOV_S (HI(17) | FMT_S | LO(6))
#define MTC1 (HI(17) | (4 << 21))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+#define MTHC1 (HI(17) | (7 << 21))
+#endif /* SLJIT_MIPS_REV >= 2 */
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#define MUH (HI(0) | (3 << 6) | LO(24))
#define MUHU (HI(0) | (3 << 6) | LO(25))
@@ -256,8 +284,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define NOR (HI(0) | LO(39))
#define OR (HI(0) | LO(37))
#define ORI (HI(13))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
#define ROTR (HI(0) | (1 << 21) | LO(2))
#define ROTRV (HI(0) | (1 << 6) | LO(6))
+#endif /* SLJIT_MIPS_REV >= 2 */
#define SD (HI(63))
#define SDL (HI(44))
#define SDR (HI(45))
@@ -279,6 +309,9 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define SWR (HI(46))
#define SWC1 (HI(57))
#define TRUNC_W_S (HI(17) | FMT_S | LO(13))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+#define WSBH (HI(31) | (2 << 6) | LO(32))
+#endif /* SLJIT_MIPS_REV >= 2 */
#define XOR (HI(0) | LO(38))
#define XORI (HI(14))
@@ -289,15 +322,21 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#else /* SLJIT_MIPS_REV < 6 */
#define DCLZ (HI(28) | LO(36))
#define MOVF (HI(0) | (0 << 16) | LO(1))
+#define MOVF_S (HI(17) | FMT_S | (0 << 16) | LO(17))
#define MOVN (HI(0) | LO(11))
+#define MOVN_S (HI(17) | FMT_S | LO(19))
#define MOVT (HI(0) | (1 << 16) | LO(1))
+#define MOVT_S (HI(17) | FMT_S | (1 << 16) | LO(17))
#define MOVZ (HI(0) | LO(10))
+#define MOVZ_S (HI(17) | FMT_S | LO(18))
#define MUL (HI(28) | LO(2))
#endif /* SLJIT_MIPS_REV >= 6 */
#define PREF (HI(51))
#define PREFX (HI(19) | LO(15))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
#define SEB (HI(31) | (16 << 6) | LO(32))
#define SEH (HI(31) | (24 << 6) | LO(32))
+#endif /* SLJIT_MIPS_REV >= 2 */
#endif /* SLJIT_MIPS_REV >= 1 */
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -318,10 +357,107 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define LOAD_W LD
#endif
+#define MOV_fmt(f) (HI(17) | f | LO(6))
+
#define SIMM_MAX (0x7fff)
#define SIMM_MIN (-0x8000)
#define UIMM_MAX (0xffff)
+#define CPU_FEATURE_DETECTED (1 << 0)
+#define CPU_FEATURE_FPU (1 << 1)
+#define CPU_FEATURE_FP64 (1 << 2)
+#define CPU_FEATURE_FR (1 << 3)
+
+static sljit_u32 cpu_feature_list = 0;
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+ && (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+
+static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
+{
+ if (compiler->scratches == -1)
+ return 0;
+
+ if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
+ fr -= SLJIT_F64_SECOND(0);
+
+ return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
+ || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
+ || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
+}
+
+#endif /* SLJIT_CONFIG_MIPS_32 && SLJIT_ARGUMENT_CHECKS */
+
+static void get_cpu_features(void)
+{
+#if !defined(SLJIT_IS_FPU_AVAILABLE) && defined(__GNUC__)
+ sljit_u32 fir = 0;
+#endif /* !SLJIT_IS_FPU_AVAILABLE && __GNUC__ */
+ sljit_u32 feature_list = CPU_FEATURE_DETECTED;
+
+#if defined(SLJIT_IS_FPU_AVAILABLE)
+#if SLJIT_IS_FPU_AVAILABLE
+ feature_list |= CPU_FEATURE_FPU;
+#if SLJIT_IS_FPU_AVAILABLE == 64
+ feature_list |= CPU_FEATURE_FP64;
+#endif /* SLJIT_IS_FPU_AVAILABLE == 64 */
+#endif /* SLJIT_IS_FPU_AVAILABLE */
+#elif defined(__GNUC__)
+ __asm__ ("cfc1 %0, $0" : "=r"(fir));
+ if ((fir & (0x3 << 16)) == (0x3 << 16))
+ feature_list |= CPU_FEATURE_FPU;
+
+#if (defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64) \
+ && (!defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV < 2)
+ if ((feature_list & CPU_FEATURE_FPU))
+ feature_list |= CPU_FEATURE_FP64;
+#else /* SLJIT_CONFIG_MIPS32 || SLJIT_MIPS_REV >= 2 */
+ if ((fir & (1 << 22)))
+ feature_list |= CPU_FEATURE_FP64;
+#endif /* SLJIT_CONFIG_MIPS_64 && SLJIT_MIPS_REV < 2 */
+#endif /* SLJIT_IS_FPU_AVAILABLE */
+
+ if ((feature_list & CPU_FEATURE_FPU) && (feature_list & CPU_FEATURE_FP64)) {
+#if defined(SLJIT_CONFIG_MIPS_32) && SLJIT_CONFIG_MIPS_32
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 6
+ feature_list |= CPU_FEATURE_FR;
+#elif defined(SLJIT_DETECT_FR) && SLJIT_DETECT_FR == 0
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 5
+ feature_list |= CPU_FEATURE_FR;
+#endif /* SLJIT_MIPS_REV >= 5 */
+#else
+ sljit_s32 flag = -1;
+#ifndef FR_GET_FP_MODE
+ sljit_f64 zero = 0.0;
+#else /* PR_GET_FP_MODE */
+ flag = prctl(PR_GET_FP_MODE);
+
+ if (flag > 0)
+ feature_list |= CPU_FEATURE_FR;
+#endif /* FP_GET_PR_MODE */
+#if ((defined(SLJIT_DETECT_FR) && SLJIT_DETECT_FR == 2) \
+ || (!defined(PR_GET_FP_MODE) && (!defined(SLJIT_DETECT_FR) || SLJIT_DETECT_FR >= 1))) \
+ && (defined(__GNUC__) && (defined(__mips) && __mips >= 2))
+ if (flag < 0) {
+ __asm__ (".set oddspreg\n"
+ "lwc1 $f17, %0\n"
+ "ldc1 $f16, %1\n"
+ "swc1 $f17, %0\n"
+ : "+m" (flag) : "m" (zero) : "$f16", "$f17");
+ if (flag)
+ feature_list |= CPU_FEATURE_FR;
+ }
+#endif /* (!PR_GET_FP_MODE || (PR_GET_FP_MODE && SLJIT_DETECT_FR == 2)) && __GNUC__ */
+#endif /* SLJIT_MIPS_REV >= 6 */
+#else /* !SLJIT_CONFIG_MIPS_32 */
+ /* StatusFR=1 is the only mode supported by the code in MIPS64 */
+ feature_list |= CPU_FEATURE_FR;
+#endif /* SLJIT_CONFIG_MIPS_32 */
+ }
+
+ cpu_feature_list = feature_list;
+}
+
/* dest_reg is the absolute name of the register
Useful for reordering instructions in the delay slot. */
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 delay_slot)
@@ -715,21 +851,23 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
{
-#if defined(__GNUC__) && !defined(SLJIT_IS_FPU_AVAILABLE)
- sljit_sw fir = 0;
-#endif /* __GNUC__ && !SLJIT_IS_FPU_AVAILABLE */
-
switch (feature_type) {
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+ && (!defined(SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE)
+ case SLJIT_HAS_F64_AS_F32_PAIR:
+ if (!cpu_feature_list)
+ get_cpu_features();
+
+ return (cpu_feature_list & CPU_FEATURE_FR) != 0;
+#endif /* SLJIT_CONFIG_MIPS_32 && SLJIT_IS_FPU_AVAILABLE */
case SLJIT_HAS_FPU:
-#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
-#elif defined(__GNUC__)
- __asm__ ("cfc1 %0, $0" : "=r"(fir));
- return (fir >> 22) & 0x1;
-#else
-#error "FIR check is not implemented for this architecture"
-#endif
+ if (!cpu_feature_list)
+ get_cpu_features();
+
+ return (cpu_feature_list & CPU_FEATURE_FPU) != 0;
case SLJIT_HAS_ZERO_REGISTER:
+ case SLJIT_HAS_COPY_F32:
+ case SLJIT_HAS_COPY_F64:
return 1;
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
case SLJIT_HAS_CLZ:
@@ -741,6 +879,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
return 2;
#endif /* SLJIT_MIPS_REV >= 1 */
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
+ case SLJIT_HAS_REV:
case SLJIT_HAS_ROT:
return 1;
#endif /* SLJIT_MIPS_REV >= 2 */
@@ -751,7 +890,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
{
- return (type >= SLJIT_ORDERED_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL);
+ SLJIT_UNUSED_ARG(type);
+ return 0;
}
/* --------------------------------------------------------------------- */
@@ -792,6 +932,12 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr);
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define SELECT_OP(a, b) (b)
+#else
+#define SELECT_OP(a, b) (!(op & SLJIT_32) ? a : b)
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
#include "sljitNativeMIPS_32.c"
#else
#include "sljitNativeMIPS_64.c"
@@ -815,12 +961,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
if ((local_size & SSIZE_OF(sw)) != 0)
local_size += SSIZE_OF(sw);
- local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
+ local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
}
local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
#else
- local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
+ local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
local_size = (local_size + SLJIT_LOCALS_OFFSET + 31) & ~0x1f;
#endif
compiler->local_size = local_size;
@@ -918,10 +1064,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (word_arg_count == 0 && float_arg_count <= 2) {
if (float_arg_count == 1)
- FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_D) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
} else if (arg_count < 4) {
FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS));
- FAIL_IF(push_inst(compiler, MTC1 | TA(5 + arg_count) | FS(float_arg_count) | (1 << 11), MOVABLE_INS));
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ FAIL_IF(push_inst(compiler, MTHC1 | TA(5 + arg_count) | FS(float_arg_count), MOVABLE_INS));
+ break;
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ FAIL_IF(push_inst(compiler, MTC1 | TA(5 + arg_count) | FS(float_arg_count) | (1 << 11), MOVABLE_INS));
+ break;
+ }
} else
FAIL_IF(push_inst(compiler, LDC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS));
arg_count++;
@@ -931,7 +1086,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (word_arg_count == 0 && float_arg_count <= 2) {
if (float_arg_count == 1)
- FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
} else if (arg_count < 4)
FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS));
else
@@ -966,16 +1121,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
case SLJIT_ARG_TYPE_F64:
float_arg_count++;
if (arg_count != float_arg_count)
- FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(arg_count) | FD(float_arg_count), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_D) | FS(arg_count) | FD(float_arg_count), MOVABLE_INS));
else if (arg_count == 1)
- FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_D) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
break;
case SLJIT_ARG_TYPE_F32:
float_arg_count++;
if (arg_count != float_arg_count)
- FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(arg_count) | FD(float_arg_count), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(arg_count) | FD(float_arg_count), MOVABLE_INS));
else if (arg_count == 1)
- FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
break;
default:
word_arg_count++;
@@ -1011,12 +1166,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
if ((local_size & SSIZE_OF(sw)) != 0)
local_size += SSIZE_OF(sw);
- local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
+ local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
}
compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
#else
- local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
+ local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 31) & ~0x1f;
#endif
return SLJIT_SUCCESS;
@@ -1042,10 +1197,10 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
if ((tmp & SSIZE_OF(sw)) != 0)
tmp += SSIZE_OF(sw);
- tmp += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
+ tmp += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
}
#else
- tmp += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
+ tmp += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
#endif
if (local_size <= SIMM_MAX) {
@@ -1138,7 +1293,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c
FAIL_IF(emit_stack_frame_release(compiler, 1, &ins));
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
FAIL_IF(push_inst(compiler, JR | S(src), UNMOVABLE_INS));
return push_inst(compiler, ins, UNMOVABLE_INS);
}
@@ -1388,16 +1543,12 @@ static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, slji
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-#define SELECT_OP(a, b) (b)
-
#define EMIT_SHIFT(dimm, dimm32, imm, dv, v) \
op_imm = (imm); \
op_v = (v);
#else /* !SLJIT_CONFIG_MIPS_32 */
-#define SELECT_OP(a, b) \
- (!(op & SLJIT_32) ? a : b)
#define EMIT_SHIFT(dimm, dimm32, imm, dv, v) \
op_dimm = (dimm); \
@@ -1414,10 +1565,10 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, slj
{
sljit_s32 is_clz = (GET_OPCODE(op) == SLJIT_CLZ);
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
- sljit_ins max = (op & SLJIT_32) ? 32 : 64;
-#else /* !SLJIT_CONFIG_RISCV_64 */
- sljit_ins max = 32;
-#endif /* SLJIT_CONFIG_RISCV_64 */
+ sljit_ins word_size = (op & SLJIT_32) ? 32 : 64;
+#else /* !SLJIT_CONFIG_MIPS_64 */
+ sljit_ins word_size = 32;
+#endif /* SLJIT_CONFIG_MIPS_64 */
/* The TMP_REG2 is the next value. */
if (src != TMP_REG2)
@@ -1425,7 +1576,7 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, slj
FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG2) | TA(0) | IMM(is_clz ? 13 : 14), UNMOVABLE_INS));
/* The OTHER_FLAG is the counter. Delay slot. */
- FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(max), OTHER_FLAG));
+ FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(word_size), OTHER_FLAG));
if (!is_clz) {
FAIL_IF(push_inst(compiler, ANDI | S(TMP_REG2) | T(TMP_REG1) | IMM(1), DR(TMP_REG1)));
@@ -1437,7 +1588,7 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, slj
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(0), OTHER_FLAG));
/* The TMP_REG1 is the next shift. */
- FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(TMP_REG1) | IMM(max), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(TMP_REG1) | IMM(word_size), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(TMP_REG2) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, SELECT_OP(DSRL, SRL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), DR(TMP_REG1)));
@@ -1459,6 +1610,104 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, slj
#endif /* SLJIT_MIPS_REV < 1 */
+static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src)
+{
+#if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64
+ int is_32 = (op & SLJIT_32);
+#endif /* SLJIT_CONFIG_MIPS_64 */
+
+ op = GET_OPCODE(op);
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+#if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64
+ if (!is_32 && (op == SLJIT_REV)) {
+ FAIL_IF(push_inst(compiler, DSBH | T(src) | D(dst), DR(dst)));
+ return push_inst(compiler, DSHD | T(dst) | D(dst), DR(dst));
+ }
+ if (op != SLJIT_REV && src != TMP_REG2) {
+ FAIL_IF(push_inst(compiler, SLL | T(src) | D(TMP_REG1), DR(TMP_REG1)));
+ src = TMP_REG1;
+ }
+#endif /* SLJIT_CONFIG_MIPS_64 */
+ FAIL_IF(push_inst(compiler, WSBH | T(src) | D(dst), DR(dst)));
+ FAIL_IF(push_inst(compiler, ROTR | T(dst) | D(dst) | SH_IMM(16), DR(dst)));
+#if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64
+ if (op == SLJIT_REV_U32 && dst != TMP_REG2 && dst != TMP_REG3)
+ FAIL_IF(push_inst(compiler, DINSU | T(dst) | SA(0) | (31 << 11), DR(dst)));
+#endif /* SLJIT_CONFIG_MIPS_64 */
+#else /* SLJIT_MIPS_REV < 2 */
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+ if (!is_32) {
+ FAIL_IF(push_inst(compiler, DSRL32 | T(src) | D(TMP_REG1) | SH_IMM(0), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, ORI | SA(0) | TA(OTHER_FLAG) | 0xffff, OTHER_FLAG));
+ FAIL_IF(push_inst(compiler, DSLL32 | T(src) | D(dst) | SH_IMM(0), DR(dst)));
+ FAIL_IF(push_inst(compiler, DSLL32 | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(0), OTHER_FLAG));
+ FAIL_IF(push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst)));
+
+ FAIL_IF(push_inst(compiler, DSRL | T(dst) | D(TMP_REG1) | SH_IMM(16), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, ORI | SA(OTHER_FLAG) | TA(OTHER_FLAG) | 0xffff, OTHER_FLAG));
+ FAIL_IF(push_inst(compiler, AND | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+ FAIL_IF(push_inst(compiler, AND | S(TMP_REG1) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, DSLL | TA(OTHER_FLAG) | DA(EQUAL_FLAG) | SH_IMM(8), EQUAL_FLAG));
+ FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst)));
+ FAIL_IF(push_inst(compiler, XOR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+ FAIL_IF(push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst)));
+
+ FAIL_IF(push_inst(compiler, DSRL | T(dst) | D(TMP_REG1) | SH_IMM(8), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, AND | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+ FAIL_IF(push_inst(compiler, AND | S(TMP_REG1) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(8), DR(dst)));
+ return push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst));
+ }
+
+ if (op != SLJIT_REV && src != TMP_REG2) {
+ FAIL_IF(push_inst(compiler, SLL | T(src) | D(TMP_REG2) | SH_IMM(0), DR(TMP_REG2)));
+ src = TMP_REG2;
+ }
+#endif /* SLJIT_CONFIG_MIPS_64 */
+
+ FAIL_IF(push_inst(compiler, SRL | T(src) | D(TMP_REG1) | SH_IMM(16), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, LUI | TA(OTHER_FLAG) | 0xff, OTHER_FLAG));
+ FAIL_IF(push_inst(compiler, SLL | T(src) | D(dst) | SH_IMM(16), DR(dst)));
+ FAIL_IF(push_inst(compiler, ORI | SA(OTHER_FLAG) | TA(OTHER_FLAG) | 0xff, OTHER_FLAG));
+ FAIL_IF(push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst)));
+
+ FAIL_IF(push_inst(compiler, SRL | T(dst) | D(TMP_REG1) | SH_IMM(8), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, AND | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+ FAIL_IF(push_inst(compiler, AND | S(TMP_REG1) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, SLL | T(dst) | D(dst) | SH_IMM(8), DR(dst)));
+ FAIL_IF(push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst)));
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+ if (op == SLJIT_REV_U32 && dst != TMP_REG2 && dst != TMP_REG3) {
+ FAIL_IF(push_inst(compiler, DSLL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)));
+ FAIL_IF(push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)));
+ }
+#endif /* SLJIT_CONFIG_MIPS_64 */
+#endif /* SLJIT_MIPR_REV >= 2 */
+ return SLJIT_SUCCESS;
+}
+
+static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src)
+{
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+#if defined(SLJIT_CONFIG_MIPS_32) && SLJIT_CONFIG_MIPS_32
+ FAIL_IF(push_inst(compiler, WSBH | T(src) | D(dst), DR(dst)));
+#else /* !SLJIT_CONFIG_MIPS_32 */
+ FAIL_IF(push_inst(compiler, DSBH | T(src) | D(dst), DR(dst)));
+#endif /* SLJIT_CONFIG_MIPS_32 */
+ if (GET_OPCODE(op) == SLJIT_REV_U16)
+ return push_inst(compiler, ANDI | S(dst) | T(dst) | 0xffff, DR(dst));
+ else
+ return push_inst(compiler, SEH | T(dst) | D(dst), DR(dst));
+#else /* SLJIT_MIPS_REV < 2 */
+ FAIL_IF(push_inst(compiler, SELECT_OP(DSRL, SRL) | T(src) | D(TMP_REG1) | SH_IMM(8), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | T(src) | D(dst) | SH_IMM(24), DR(dst)));
+ FAIL_IF(push_inst(compiler, ANDI | S(TMP_REG1) | T(TMP_REG1) | 0xff, DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U16 ? SELECT_OP(DSRL32, SRL) : SELECT_OP(DSRA32, SRA)) | T(dst) | D(dst) | SH_IMM(16), DR(dst)));
+ return push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst));
+#endif /* SLJIT_MIPS_REV >= 2 */
+}
+
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
{
@@ -1486,17 +1735,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
-#else /* SLJIT_MIPS_REV < 1 */
+#else /* SLJIT_MIPS_REV < 2 */
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
-#endif /* SLJIT_MIPS_REV >= 1 */
+#endif /* SLJIT_MIPS_REV >= 2 */
#else /* !SLJIT_CONFIG_MIPS_32 */
-#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
if (op & SLJIT_32)
return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
-#endif /* SLJIT_MIPS_REV >= 1 */
+#endif /* SLJIT_MIPS_REV >= 2 */
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst));
#endif /* SLJIT_CONFIG_MIPS_32 */
@@ -1515,17 +1764,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
-#else /* SLJIT_MIPS_REV < 1 */
+#else /* SLJIT_MIPS_REV < 2 */
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
-#endif /* SLJIT_MIPS_REV >= 1 */
+#endif /* SLJIT_MIPS_REV >= 2 */
#else /* !SLJIT_CONFIG_MIPS_32 */
-#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
if (op & SLJIT_32)
return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
-#endif /* SLJIT_MIPS_REV >= 1 */
+#endif /* SLJIT_MIPS_REV >= 2 */
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst));
#endif /* SLJIT_CONFIG_MIPS_32 */
@@ -1539,7 +1788,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
if (dst == src2)
- return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11) | (0 << 11), DR(dst));
+ return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11), DR(dst));
#endif /* SLJIT_MIPS_REV >= 2 */
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst)));
return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst));
@@ -1556,14 +1805,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return SLJIT_SUCCESS;
#endif /* SLJIT_CONFIG_MIPS_64 */
- case SLJIT_NOT:
- SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
- if (op & SLJIT_SET_Z)
- FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
- if (!(flags & UNUSED_DEST))
- FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
- return SLJIT_SUCCESS;
-
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
@@ -1591,10 +1832,21 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return emit_clz_ctz(compiler, op, dst, src2);
#endif /* SLJIT_MIPS_REV >= 1 */
+ case SLJIT_REV:
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
+ SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && src2 != TMP_REG1 && dst != TMP_REG1);
+ return emit_rev(compiler, op, dst, src2);
+
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+ return emit_rev16(compiler, op, dst, src2);
+
case SLJIT_ADD:
/* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
- carry_src_ar = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+ carry_src_ar = GET_FLAG_TYPE(op) == SLJIT_CARRY;
if (flags & SRC2_IMM) {
if (is_overflow) {
@@ -1650,7 +1902,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return push_inst(compiler, XOR | S(TMP_REG1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
case SLJIT_ADDC:
- carry_src_ar = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+ carry_src_ar = GET_FLAG_TYPE(op) == SLJIT_CARRY;
if (flags & SRC2_IMM) {
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
@@ -1697,11 +1949,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
is_handled = 0;
if (flags & SRC2_IMM) {
- if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+ if (GET_FLAG_TYPE(op) == SLJIT_LESS) {
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
is_handled = 1;
}
- else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+ else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) {
FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
is_handled = 1;
}
@@ -1718,19 +1970,15 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
switch (GET_FLAG_TYPE(op)) {
case SLJIT_LESS:
- case SLJIT_GREATER_EQUAL:
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
break;
case SLJIT_GREATER:
- case SLJIT_LESS_EQUAL:
FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
break;
case SLJIT_SIG_LESS:
- case SLJIT_SIG_GREATER_EQUAL:
FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
break;
case SLJIT_SIG_GREATER:
- case SLJIT_SIG_LESS_EQUAL:
FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
break;
}
@@ -1753,7 +2001,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
- is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+ is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
if (flags & SRC2_IMM) {
if (is_overflow) {
@@ -1802,7 +2050,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
flags &= ~SRC2_IMM;
}
- is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+ is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
if (flags & SRC2_IMM) {
if (is_carry)
@@ -1868,6 +2116,14 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return SLJIT_SUCCESS;
case SLJIT_XOR:
+ if (!(flags & LOGICAL_OP)) {
+ SLJIT_ASSERT((flags & SRC2_IMM) && src2 == -1);
+ if (op & SLJIT_SET_Z)
+ FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ if (!(flags & UNUSED_DEST))
+ FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | D(dst), DR(dst)));
+ return SLJIT_SUCCESS;
+ }
EMIT_LOGICAL(XORI, XOR);
return SLJIT_SUCCESS;
@@ -2034,9 +2290,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
compiler->cache_argw = 0;
}
- if (dst == TMP_REG2) {
+ if (dst == 0) {
SLJIT_ASSERT(HAS_FLAGS(op));
flags |= UNUSED_DEST;
+ dst = TMP_REG2;
}
else if (FAST_IS_REG(dst)) {
dst_r = dst;
@@ -2048,10 +2305,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
flags |= SLOW_DEST;
if (flags & IMM_OP) {
- if ((src2 & SLJIT_IMM) && src2w != 0 && CHECK_IMM(flags, src2w)) {
+ if (src2 == SLJIT_IMM && src2w != 0 && CHECK_IMM(flags, src2w)) {
flags |= SRC2_IMM;
src2_r = src2w;
- } else if ((flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w != 0 && CHECK_IMM(flags, src1w)) {
+ } else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && CHECK_IMM(flags, src1w)) {
flags |= SRC2_IMM;
src2_r = src1w;
@@ -2068,7 +2325,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
src1_r = src1;
flags |= REG1_SOURCE;
}
- else if (src1 & SLJIT_IMM) {
+ else if (src1 == SLJIT_IMM) {
if (src1w) {
FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w));
src1_r = TMP_REG1;
@@ -2091,7 +2348,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
dst_r = (sljit_s32)src2_r;
}
- else if (src2 & SLJIT_IMM) {
+ else if (src2 == SLJIT_IMM) {
if (!(flags & SRC2_IMM)) {
if (src2w) {
FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w));
@@ -2279,31 +2536,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
case SLJIT_MOV_U32:
- return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);
case SLJIT_MOV_S32:
case SLJIT_MOV32:
- return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);
#endif
case SLJIT_MOV_U8:
- return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
+ return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
case SLJIT_MOV_S8:
- return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
+ return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
case SLJIT_MOV_U16:
- return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
+ return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
case SLJIT_MOV_S16:
- return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
-
- case SLJIT_NOT:
- return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+ return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
case SLJIT_CLZ:
case SLJIT_CTZ:
+ case SLJIT_REV:
return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
+ return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
}
SLJIT_UNREACHABLE();
@@ -2326,9 +2589,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if (op & SLJIT_32) {
flags |= INT_DATA | SIGNED_DATA;
- if (src1 & SLJIT_IMM)
+ if (src1 == SLJIT_IMM)
src1w = (sljit_s32)src1w;
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
src2w = (sljit_s32)src2w;
}
#endif
@@ -2348,9 +2611,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
compiler->status_flags_state = 0;
return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
+ case SLJIT_XOR:
+ if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) {
+ return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+ }
+ /* fallthrough */
case SLJIT_AND:
case SLJIT_OR:
- case SLJIT_XOR:
return emit_op(compiler, op, flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SHL:
@@ -2362,10 +2629,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
case SLJIT_ROTL:
case SLJIT_ROTR:
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
src2w &= 0x1f;
#else
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
if (op & SLJIT_32)
src2w &= 0x1f;
else
@@ -2387,7 +2654,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
SLJIT_SKIP_CHECKS(compiler);
- return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
+ return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w);
}
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
@@ -2399,9 +2666,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
#endif /* SLJIT_CONFIG_MIPS_64 */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 src_dst,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
+ sljit_s32 dst_reg,
+ sljit_s32 src1_reg,
+ sljit_s32 src2_reg,
+ sljit_s32 src3, sljit_sw src3w)
{
sljit_s32 is_left;
sljit_ins ins1, ins2, ins3;
@@ -2414,50 +2682,44 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *
#endif /* SLJIT_CONFIG_MIPS_64 */
CHECK_ERROR();
- CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
+ CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
- if (src_dst == src1) {
+ if (src1_reg == src2_reg) {
SLJIT_SKIP_CHECKS(compiler);
- return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
+ return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
}
- ADJUST_LOCAL_OFFSET(src1, src1w);
- ADJUST_LOCAL_OFFSET(src2, src2w);
+ ADJUST_LOCAL_OFFSET(src3, src3w);
- if (src2 & SLJIT_IMM) {
- src2w &= bit_length - 1;
+ if (src3 == SLJIT_IMM) {
+ src3w &= bit_length - 1;
- if (src2w == 0)
+ if (src3w == 0)
return SLJIT_SUCCESS;
- } else if (src2 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG2), src2, src2w));
- src2 = TMP_REG2;
- }
- if (src1 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG1), src1, src1w));
- src1 = TMP_REG1;
- } else if (src1 & SLJIT_IMM) {
- FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w));
- src1 = TMP_REG1;
- }
-
- if (src2 & SLJIT_IMM) {
if (is_left) {
- ins1 = SELECT_OP3(op, src2w, DSLL, DSLL32, SLL);
- src2w = bit_length - src2w;
- ins2 = SELECT_OP3(op, src2w, DSRL, DSRL32, SRL);
+ ins1 = SELECT_OP3(op, src3w, DSLL, DSLL32, SLL);
+ src3w = bit_length - src3w;
+ ins2 = SELECT_OP3(op, src3w, DSRL, DSRL32, SRL);
} else {
- ins1 = SELECT_OP3(op, src2w, DSRL, DSRL32, SRL);
- src2w = bit_length - src2w;
- ins2 = SELECT_OP3(op, src2w, DSLL, DSLL32, SLL);
+ ins1 = SELECT_OP3(op, src3w, DSRL, DSRL32, SRL);
+ src3w = bit_length - src3w;
+ ins2 = SELECT_OP3(op, src3w, DSLL, DSLL32, SLL);
}
- FAIL_IF(push_inst(compiler, ins1 | T(src_dst) | D(src_dst), DR(src_dst)));
- FAIL_IF(push_inst(compiler, ins2 | T(src1) | D(TMP_REG1), DR(TMP_REG1)));
- return push_inst(compiler, OR | S(src_dst) | T(TMP_REG1) | D(src_dst), DR(src_dst));
+ FAIL_IF(push_inst(compiler, ins1 | T(src1_reg) | D(dst_reg), DR(dst_reg)));
+ FAIL_IF(push_inst(compiler, ins2 | T(src2_reg) | D(TMP_REG1), DR(TMP_REG1)));
+ return push_inst(compiler, OR | S(dst_reg) | T(TMP_REG1) | D(dst_reg), DR(dst_reg));
+ }
+
+ if (src3 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG2), src3, src3w));
+ src3 = TMP_REG2;
+ } else if (dst_reg == src3) {
+ FAIL_IF(push_inst(compiler, SELECT_OP2(op, DADDU, ADDU) | S(src3) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+ src3 = TMP_REG2;
}
if (is_left) {
@@ -2470,17 +2732,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *
ins3 = SELECT_OP2(op, DSLLV, SLLV);
}
- FAIL_IF(push_inst(compiler, ins2 | S(src2) | T(src_dst) | D(src_dst), DR(src_dst)));
+ FAIL_IF(push_inst(compiler, ins2 | S(src3) | T(src1_reg) | D(dst_reg), DR(dst_reg)));
if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
- FAIL_IF(push_inst(compiler, ins1 | T(src1) | D(TMP_REG1) | (1 << 6), DR(TMP_REG1)));
- FAIL_IF(push_inst(compiler, XORI | S(src2) | T(TMP_REG2) | ((sljit_ins)bit_length - 1), DR(TMP_REG2)));
- src1 = TMP_REG1;
+ FAIL_IF(push_inst(compiler, ins1 | T(src2_reg) | D(TMP_REG1) | (1 << 6), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, XORI | S(src3) | T(TMP_REG2) | ((sljit_ins)bit_length - 1), DR(TMP_REG2)));
+ src2_reg = TMP_REG1;
} else
- FAIL_IF(push_inst(compiler, SELECT_OP2(op, DSUBU, SUBU) | SA(0) | T(src2) | D(TMP_REG2), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, SELECT_OP2(op, DSUBU, SUBU) | SA(0) | T(src3) | D(TMP_REG2), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, ins3 | S(TMP_REG2) | T(src1) | D(TMP_REG1), DR(TMP_REG1)));
- return push_inst(compiler, OR | S(src_dst) | T(TMP_REG1) | D(src_dst), DR(src_dst));
+ FAIL_IF(push_inst(compiler, ins3 | S(TMP_REG2) | T(src2_reg) | D(TMP_REG1), DR(TMP_REG1)));
+ return push_inst(compiler, OR | S(dst_reg) | T(TMP_REG1) | D(dst_reg), DR(dst_reg));
}
#undef SELECT_OP3
@@ -2518,21 +2780,54 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw)
{
- CHECK_REG_INDEX(check_sljit_get_register_index(reg));
- return reg_map[reg];
+ sljit_s32 dst_ar = RETURN_ADDR_REG;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ switch (op) {
+ case SLJIT_FAST_ENTER:
+ if (FAST_IS_REG(dst))
+ return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), UNMOVABLE_INS);
+ break;
+ case SLJIT_GET_RETURN_ADDRESS:
+ dst_ar = DR(FAST_IS_REG(dst) ? dst : TMP_REG2);
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_ar, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw)));
+ break;
+ }
+
+ if (dst & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw));
+
+ if (op == SLJIT_FAST_ENTER)
+ compiler->delay_slot = UNMOVABLE_INS;
+ }
+
+ return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
{
- CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+ CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
+
+ if (type == SLJIT_GP_REGISTER)
+ return reg_map[reg];
+
+ if (type != SLJIT_FLOAT_REGISTER)
+ return -1;
+
return FR(reg);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_u32 size)
{
+ SLJIT_UNUSED_ARG(size);
+
CHECK_ERROR();
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
@@ -2544,14 +2839,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
/* --------------------------------------------------------------------- */
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7))
-#define FMT(op) ((((sljit_ins)op & SLJIT_32) ^ SLJIT_32) << (21 - 8))
+#define FMT(op) (FMT_S | (~(sljit_ins)op & SLJIT_32) << (21 - (5 + 3)))
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-# define flags (sljit_u32)0
+ sljit_u32 flags = 0;
#else
sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21;
#endif
@@ -2565,18 +2860,13 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
if (FAST_IS_REG(dst)) {
FAIL_IF(push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
+#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1)
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
+#endif /* MIPS III */
return SLJIT_SUCCESS;
}
- /* Store the integer value from a VFP register. */
return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, FR(TMP_FREG1), dst, dstw, 0, 0);
-
-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-# undef flags
-#endif
}
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
@@ -2584,43 +2874,158 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
sljit_s32 src, sljit_sw srcw)
{
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-# define flags (sljit_u32)0
+ sljit_u32 flags = 0;
#else
sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)) << 21;
#endif
-
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- if (FAST_IS_REG(src)) {
- FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
- FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
- } else if (src & SLJIT_MEM) {
- /* Load the integer value into a VFP register. */
+ if (src & SLJIT_MEM)
FAIL_IF(emit_op_mem2(compiler, (flags ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw));
- }
else {
+ if (src == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
- if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
- srcw = (sljit_s32)srcw;
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
+ srcw = (sljit_s32)srcw;
#endif
- FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw));
- FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
+ FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw));
+ src = TMP_REG1;
+ }
+
+ FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1)
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
+#endif /* MIPS III */
}
- FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((((sljit_ins)op & SLJIT_32) ^ SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
if (dst & SLJIT_MEM)
return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0);
return SLJIT_SUCCESS;
+}
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-# undef flags
+ sljit_u32 flags = 0;
+#else
+ sljit_u32 flags = 1 << 21;
+#endif
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem2(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW ? WORD_DATA : INT_DATA) | LOAD_DATA, DR(TMP_REG1), src, srcw, dst, dstw));
+ src = TMP_REG1;
+ } else if (src == SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
+ srcw = (sljit_u32)srcw;
#endif
+ FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw));
+ src = TMP_REG1;
+ }
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {
+ if (src != TMP_REG1) {
+ FAIL_IF(push_inst(compiler, DSLL32 | T(src) | D(TMP_REG1) | SH_IMM(0), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, DSRL32 | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(0), DR(TMP_REG1)));
+ }
+
+ FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV)
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+
+ FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0);
+ return SLJIT_SUCCESS;
+ }
+#else /* !SLJIT_CONFIG_MIPS_64 */
+ if (!(op & SLJIT_32)) {
+ FAIL_IF(push_inst(compiler, SLL | T(src) | D(TMP_REG2) | SH_IMM(1), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, SRL | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(1), DR(TMP_REG2)));
+
+ FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG2) | FS(TMP_FREG1), MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+
+ FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | 1 | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
+
+#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1)
+ FAIL_IF(push_inst(compiler, BGEZ | S(src) | 5, UNMOVABLE_INS));
+#else /* SLJIT_MIPS_REV >= 1 */
+ FAIL_IF(push_inst(compiler, BGEZ | S(src) | 4, UNMOVABLE_INS));
+#endif /* SLJIT_MIPS_REV < 1 */
+
+ FAIL_IF(push_inst(compiler, LUI | T(TMP_REG2) | IMM(0x41e0), UNMOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MTC1 | TA(0) | FS(TMP_FREG2), UNMOVABLE_INS));
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ FAIL_IF(push_inst(compiler, MTHC1 | T(TMP_REG2) | FS(TMP_FREG2), UNMOVABLE_INS));
+ break;
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(TMP_FREG2) | (1 << 11), UNMOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+ break;
+ }
+ FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(TMP_FREG2) | FS(dst_r) | FD(dst_r), UNMOVABLE_INS));
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0);
+ return SLJIT_SUCCESS;
+ }
+#endif /* SLJIT_CONFIG_MIPS_64 */
+
+#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1)
+ FAIL_IF(push_inst(compiler, BLTZ | S(src) | 5, UNMOVABLE_INS));
+#else /* SLJIT_MIPS_REV >= 1 */
+ FAIL_IF(push_inst(compiler, BLTZ | S(src) | 4, UNMOVABLE_INS));
+#endif /* SLJIT_MIPS_REV < 1 */
+ FAIL_IF(push_inst(compiler, ANDI | S(src) | T(TMP_REG2) | IMM(1), DR(TMP_REG2)));
+
+ FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV)
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* !SLJIT_MIPS_REV */
+
+ FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
+
+#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1)
+ FAIL_IF(push_inst(compiler, BEQ | 6, UNMOVABLE_INS));
+#else /* SLJIT_MIPS_REV >= 1 */
+ FAIL_IF(push_inst(compiler, BEQ | 5, UNMOVABLE_INS));
+#endif /* SLJIT_MIPS_REV < 1 */
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+ FAIL_IF(push_inst(compiler, DSRL | T(src) | D(TMP_REG1) | SH_IMM(1), DR(TMP_REG1)));
+#else /* !SLJIT_CONFIG_MIPS_64 */
+ FAIL_IF(push_inst(compiler, SRL | T(src) | D(TMP_REG1) | SH_IMM(1), DR(TMP_REG1)));
+#endif /* SLJIT_CONFIG_MIPS_64 */
+
+ FAIL_IF(push_inst(compiler, OR | S(TMP_REG1) | T(TMP_REG2) | D(TMP_REG1), DR(TMP_REG1)));
+
+ FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV)
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* !SLJIT_MIPS_REV */
+
+ FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(dst_r) | FS(dst_r) | FD(dst_r), UNMOVABLE_INS));
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0);
+ return SLJIT_SUCCESS;
}
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
@@ -2642,36 +3047,30 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
switch (GET_FLAG_TYPE(op)) {
case SLJIT_F_EQUAL:
case SLJIT_ORDERED_EQUAL:
- case SLJIT_UNORDERED_OR_NOT_EQUAL:
inst = C_EQ_S;
break;
case SLJIT_F_NOT_EQUAL:
case SLJIT_UNORDERED_OR_EQUAL:
- case SLJIT_ORDERED_NOT_EQUAL:
inst = C_UEQ_S;
break;
case SLJIT_F_LESS:
case SLJIT_ORDERED_LESS:
- case SLJIT_UNORDERED_OR_GREATER_EQUAL:
inst = C_OLT_S;
break;
case SLJIT_F_GREATER_EQUAL:
case SLJIT_UNORDERED_OR_LESS:
- case SLJIT_ORDERED_GREATER_EQUAL:
inst = C_ULT_S;
break;
case SLJIT_F_GREATER:
case SLJIT_ORDERED_GREATER:
- case SLJIT_UNORDERED_OR_LESS_EQUAL:
inst = C_ULE_S;
break;
case SLJIT_F_LESS_EQUAL:
case SLJIT_UNORDERED_OR_GREATER:
- case SLJIT_ORDERED_LESS_EQUAL:
inst = C_OLE_S;
break;
default:
- SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_UNORDERED || GET_FLAG_TYPE(op) == SLJIT_ORDERED);
+ SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_UNORDERED);
inst = C_UN_S;
break;
}
@@ -2705,7 +3104,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
case SLJIT_MOV_F64:
if (src != dst_r) {
if (dst_r != TMP_FREG1)
- FAIL_IF(push_inst(compiler, MOV_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT(op)) | FS(src) | FD(dst_r), MOVABLE_INS));
else
dst_r = src;
}
@@ -2786,18 +3185,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
case SLJIT_ADD_F64:
FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
break;
-
case SLJIT_SUB_F64:
FAIL_IF(push_inst(compiler, SUB_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
break;
-
case SLJIT_MUL_F64:
FAIL_IF(push_inst(compiler, MUL_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
break;
-
case SLJIT_DIV_F64:
FAIL_IF(push_inst(compiler, DIV_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
break;
+ case SLJIT_COPYSIGN_F64:
+ return emit_copysign(compiler, op, src1, src2, dst_r);
}
if (dst_r == TMP_FREG2)
@@ -2806,26 +3204,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
return SLJIT_SUCCESS;
}
-#undef FLOAT_DATA
-#undef FMT
-
-/* --------------------------------------------------------------------- */
-/* Other instructions */
-/* --------------------------------------------------------------------- */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f32 value)
{
+ union {
+ sljit_s32 imm;
+ sljit_f32 value;
+ } u;
+
CHECK_ERROR();
- CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
- ADJUST_LOCAL_OFFSET(dst, dstw);
+ CHECK(check_sljit_emit_fset32(compiler, freg, value));
- if (FAST_IS_REG(dst))
- return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), UNMOVABLE_INS);
+ u.value = value;
- /* Memory. */
- FAIL_IF(emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw));
- compiler->delay_slot = UNMOVABLE_INS;
- return SLJIT_SUCCESS;
+ if (u.imm == 0)
+ return push_inst(compiler, MTC1 | TA(0) | FS(freg), MOVABLE_INS);
+
+ FAIL_IF(load_immediate(compiler, DR(TMP_REG1), u.imm));
+ return push_inst(compiler, MTC1 | T(TMP_REG1) | FS(freg), MOVABLE_INS);
}
/* --------------------------------------------------------------------- */
@@ -2984,7 +3380,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
}
#define RESOLVE_IMM1() \
- if (src1 & SLJIT_IMM) { \
+ if (src1 == SLJIT_IMM) { \
if (src1w) { \
PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); \
src1 = TMP_REG1; \
@@ -2994,7 +3390,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
}
#define RESOLVE_IMM2() \
- if (src2 & SLJIT_IMM) { \
+ if (src2 == SLJIT_IMM) { \
if (src2w) { \
PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \
src2 = TMP_REG2; \
@@ -3046,10 +3442,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2)))
jump->flags |= IS_MOVABLE;
PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | BRANCH_LENGTH, UNMOVABLE_INS));
- }
- else if (type >= SLJIT_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) {
+ } else if (type >= SLJIT_SIG_LESS && ((src1 == SLJIT_IMM && src1w == 0) || (src2 == SLJIT_IMM && src2w == 0))) {
inst = NOP;
- if ((src1 & SLJIT_IMM) && (src1w == 0)) {
+ if (src1 == SLJIT_IMM && src1w == 0) {
RESOLVE_IMM2();
switch (type) {
case SLJIT_SIG_LESS:
@@ -3097,7 +3492,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
else {
if (type == SLJIT_LESS || type == SLJIT_GREATER_EQUAL || type == SLJIT_SIG_LESS || type == SLJIT_SIG_GREATER_EQUAL) {
RESOLVE_IMM1();
- if ((src2 & SLJIT_IMM) && src2w <= SIMM_MAX && src2w >= SIMM_MIN)
+ if (src2 == SLJIT_IMM && src2w <= SIMM_MAX && src2w >= SIMM_MIN)
PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1)));
else {
RESOLVE_IMM2();
@@ -3107,7 +3502,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
}
else {
RESOLVE_IMM2();
- if ((src1 & SLJIT_IMM) && src1w <= SIMM_MAX && src1w >= SIMM_MIN)
+ if (src1 == SLJIT_IMM && src1w <= SIMM_MAX && src1w >= SIMM_MIN)
PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1)));
else {
RESOLVE_IMM1();
@@ -3142,9 +3537,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
#undef BR_T
#undef BR_F
-#undef FLOAT_DATA
-#undef FMT
-
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
{
struct sljit_jump *jump = NULL;
@@ -3152,7 +3544,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
CHECK_ERROR();
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
FAIL_IF(!jump);
set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0));
@@ -3184,8 +3576,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
#endif
}
- FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
- return SLJIT_SUCCESS;
+ return push_inst(compiler, NOP, UNMOVABLE_INS);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
@@ -3287,50 +3678,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, TMP_REG2, 0);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
- sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
-{
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)
- sljit_ins ins;
-#endif /* SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6 */
-
- CHECK_ERROR();
- CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
-
-#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)
-
- if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
-#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
- if (type & SLJIT_32)
- srcw = (sljit_s32)srcw;
-#endif
- FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw));
- src = TMP_REG1;
- srcw = 0;
- }
+static sljit_ins get_select_cc(sljit_s32 type, sljit_s32 is_float)
+{
switch (type & ~SLJIT_32) {
case SLJIT_EQUAL:
- ins = MOVZ | TA(EQUAL_FLAG);
- break;
+ return (is_float ? MOVZ_S : MOVZ) | TA(EQUAL_FLAG);
case SLJIT_NOT_EQUAL:
- ins = MOVN | TA(EQUAL_FLAG);
- break;
+ return (is_float ? MOVN_S : MOVN) | TA(EQUAL_FLAG);
case SLJIT_LESS:
case SLJIT_GREATER:
case SLJIT_SIG_LESS:
case SLJIT_SIG_GREATER:
case SLJIT_OVERFLOW:
- ins = MOVN | TA(OTHER_FLAG);
- break;
+ case SLJIT_CARRY:
+ return (is_float ? MOVN_S : MOVN) | TA(OTHER_FLAG);
case SLJIT_GREATER_EQUAL:
case SLJIT_LESS_EQUAL:
case SLJIT_SIG_GREATER_EQUAL:
case SLJIT_SIG_LESS_EQUAL:
case SLJIT_NOT_OVERFLOW:
- ins = MOVZ | TA(OTHER_FLAG);
- break;
+ case SLJIT_NOT_CARRY:
+ return (is_float ? MOVZ_S : MOVZ) | TA(OTHER_FLAG);
case SLJIT_F_EQUAL:
case SLJIT_F_LESS:
case SLJIT_F_LESS_EQUAL:
@@ -3341,8 +3711,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
case SLJIT_UNORDERED_OR_LESS_EQUAL:
case SLJIT_ORDERED_LESS_EQUAL:
case SLJIT_UNORDERED:
- ins = MOVT;
- break;
+ return is_float ? MOVT_S : MOVT;
case SLJIT_F_NOT_EQUAL:
case SLJIT_F_GREATER_EQUAL:
case SLJIT_F_GREATER:
@@ -3353,21 +3722,159 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
case SLJIT_ORDERED_GREATER:
case SLJIT_UNORDERED_OR_GREATER:
case SLJIT_ORDERED:
- ins = MOVF;
- break;
+ return is_float ? MOVF_S : MOVF;
default:
- ins = MOVZ | TA(OTHER_FLAG);
SLJIT_UNREACHABLE();
- break;
+ return (is_float ? MOVZ_S : MOVZ) | TA(OTHER_FLAG);
}
+}
- return push_inst(compiler, ins | S(src) | D(dst_reg), DR(dst_reg));
+#endif /* SLJIT_MIPS_REV >= 1 */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_reg)
+{
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+ sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
+ sljit_ins mov_ins = (type & SLJIT_32) ? ADDU : DADDU;
+#else /* !SLJIT_CONFIG_MIPS_64 */
+ sljit_s32 inp_flags = WORD_DATA | LOAD_DATA;
+ sljit_ins mov_ins = ADDU;
+#endif /* SLJIT_CONFIG_MIPS_64 */
+
+#if !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)
+ struct sljit_label *label;
+ struct sljit_jump *jump;
+#endif /* !(SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) */
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG2), src1, src1w));
+ src1 = TMP_REG2;
+ } else if (src1 == SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+ if (type & SLJIT_32)
+ src1w = (sljit_s32)src1w;
+#endif
+ FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w));
+ src1 = TMP_REG1;
+ }
+
+ if (dst_reg != src2_reg) {
+ if (dst_reg == src1) {
+ src1 = src2_reg;
+ type ^= 0x1;
+ } else
+ FAIL_IF(push_inst(compiler, mov_ins | S(src2_reg) | TA(0) | D(dst_reg), DR(dst_reg)));
+ }
+
+ return push_inst(compiler, get_select_cc(type, 0) | S(src1) | D(dst_reg), DR(dst_reg));
+
+#else /* SLJIT_MIPS_REV < 1 || SLJIT_MIPS_REV >= 6 */
+ if (dst_reg != src2_reg) {
+ if (dst_reg == src1) {
+ src1 = src2_reg;
+ src1w = 0;
+ type ^= 0x1;
+ } else {
+ if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
+ FAIL_IF(push_inst(compiler, ADDU_W | S(dst_reg) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+
+ if ((src1 & REG_MASK) == dst_reg)
+ src1 = (src1 & ~REG_MASK) | TMP_REG2;
+
+ if (OFFS_REG(src1) == dst_reg)
+ src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2);
+ }
+
+ FAIL_IF(push_inst(compiler, mov_ins | S(src2_reg) | TA(0) | D(dst_reg), DR(dst_reg)));
+ }
+ }
+
+ SLJIT_SKIP_CHECKS(compiler);
+ jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);
+ FAIL_IF(!jump);
+
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, inp_flags, DR(dst_reg), src1, src1w));
+ } else if (src1 == SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+ if (type & SLJIT_32)
+ src1w = (sljit_s32)src1w;
+#endif /* SLJIT_CONFIG_MIPS_64 */
+ FAIL_IF(load_immediate(compiler, DR(dst_reg), src1w));
+ } else
+ FAIL_IF(push_inst(compiler, mov_ins | S(src1) | TA(0) | D(dst_reg), DR(dst_reg)));
+
+ SLJIT_SKIP_CHECKS(compiler);
+ label = sljit_emit_label(compiler);
+ FAIL_IF(!label);
+
+ sljit_set_label(jump, label);
+ return SLJIT_SUCCESS;
+#endif /* SLJIT_MIPS_REV >= 1 */
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_freg)
+{
+#if !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)
+ struct sljit_label *label;
+ struct sljit_jump *jump;
+#endif /* !(SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) */
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ if (dst_freg != src2_freg) {
+ if (dst_freg == src1) {
+ src1 = src2_freg;
+ src1w = 0;
+ type ^= 0x1;
+ } else
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT(type)) | FS(src2_freg) | FD(dst_freg), MOVABLE_INS));
+ }
+
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, FR(TMP_FREG1), src1, src1w));
+ src1 = TMP_FREG1;
+ }
+
+ return push_inst(compiler, get_select_cc(type, 1) | FMT(type) | FS(src1) | FD(dst_freg), MOVABLE_INS);
#else /* SLJIT_MIPS_REV < 1 || SLJIT_MIPS_REV >= 6 */
- return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
+ SLJIT_SKIP_CHECKS(compiler);
+ jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);
+ FAIL_IF(!jump);
+
+ if (src1 & SLJIT_MEM)
+ FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, FR(dst_freg), src1, src1w));
+ else
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT(type)) | FS(src1) | FD(dst_freg), MOVABLE_INS));
+
+ SLJIT_SKIP_CHECKS(compiler);
+ label = sljit_emit_label(compiler);
+ FAIL_IF(!label);
+
+ sljit_set_label(jump, label);
+ return SLJIT_SUCCESS;
#endif /* SLJIT_MIPS_REV >= 1 */
}
+#undef FLOAT_DATA
+#undef FMT
+
static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s16 max_offset)
{
sljit_s32 arg = *mem;
@@ -3410,21 +3917,33 @@ static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem
}
#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
-#define MEM16_IMM_FIRST(memw) IMM((memw) + 1)
-#define MEM16_IMM_SECOND(memw) IMM(memw)
-#define MEMF64_FS_FIRST(freg) FS(freg)
-#define MEMF64_FS_SECOND(freg) (FS(freg) | ((sljit_ins)1 << 11))
+#define IMM_LEFT(memw) IMM((memw) + SSIZE_OF(sw) - 1)
+#define IMM_RIGHT(memw) IMM(memw)
+#define IMM_32_LEFT(memw) IMM((memw) + SSIZE_OF(s32) - 1)
+#define IMM_32_RIGHT(memw) IMM(memw)
+#define IMM_F64_FIRST_LEFT(memw) IMM((memw) + SSIZE_OF(s32) - 1)
+#define IMM_F64_FIRST_RIGHT(memw) IMM(memw)
+#define IMM_F64_SECOND_LEFT(memw) IMM((memw) + SSIZE_OF(f64) - 1)
+#define IMM_F64_SECOND_RIGHT(memw) IMM((memw) + SSIZE_OF(s32))
+#define IMM_16_FIRST(memw) IMM((memw) + 1)
+#define IMM_16_SECOND(memw) IMM(memw)
#else /* !SLJIT_LITTLE_ENDIAN */
-#define MEM16_IMM_FIRST(memw) IMM(memw)
-#define MEM16_IMM_SECOND(memw) IMM((memw) + 1)
-#define MEMF64_FS_FIRST(freg) (FS(freg) | ((sljit_ins)1 << 11))
-#define MEMF64_FS_SECOND(freg) FS(freg)
+#define IMM_LEFT(memw) IMM(memw)
+#define IMM_RIGHT(memw) IMM((memw) + SSIZE_OF(sw) - 1)
+#define IMM_32_LEFT(memw) IMM(memw)
+#define IMM_32_RIGHT(memw) IMM((memw) + SSIZE_OF(s32) - 1)
+#define IMM_F64_FIRST_LEFT(memw) IMM((memw) + SSIZE_OF(s32))
+#define IMM_F64_FIRST_RIGHT(memw) IMM((memw) + SSIZE_OF(f64) - 1)
+#define IMM_F64_SECOND_LEFT(memw) IMM(memw)
+#define IMM_F64_SECOND_RIGHT(memw) IMM((memw) + SSIZE_OF(s32) - 1)
+#define IMM_16_FIRST(memw) IMM(memw)
+#define IMM_16_SECOND(memw) IMM((memw) + 1)
#endif /* SLJIT_LITTLE_ENDIAN */
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-#define MEM_CHECK_UNALIGNED(type) ((type) & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16))
+#define MEM_CHECK_UNALIGNED(type) ((type) & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16))
#else /* !SLJIT_CONFIG_MIPS_32 */
-#define MEM_CHECK_UNALIGNED(type) ((type) & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32))
+#define MEM_CHECK_UNALIGNED(type) ((type) & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32))
#endif /* SLJIT_CONFIG_MIPS_32 */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
@@ -3461,10 +3980,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
ins_right = ((type & SLJIT_MEM_STORE) ? SDR : LDR) | S(mem);
#endif /* SLJIT_CONFIG_MIPS_32 */
- FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_FIRST(reg)) | IMM(memw), DR(REG_PAIR_FIRST(reg))));
- FAIL_IF(push_inst(compiler, ins_right | T(REG_PAIR_FIRST(reg)) | IMM(memw + (SSIZE_OF(sw) - 1)), DR(REG_PAIR_FIRST(reg))));
- FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_SECOND(reg)) | IMM(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg))));
- return push_inst(compiler, ins_right | T(REG_PAIR_SECOND(reg)) | IMM((memw + 2 * SSIZE_OF(sw) - 1)), DR(REG_PAIR_SECOND(reg)));
+ FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_FIRST(reg)) | IMM_LEFT(memw), DR(REG_PAIR_FIRST(reg))));
+ FAIL_IF(push_inst(compiler, ins_right | T(REG_PAIR_FIRST(reg)) | IMM_RIGHT(memw), DR(REG_PAIR_FIRST(reg))));
+ FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_SECOND(reg)) | IMM_LEFT(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg))));
+ return push_inst(compiler, ins_right | T(REG_PAIR_SECOND(reg)) | IMM_RIGHT(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg)));
}
#endif /* !(SLJIT_MIPS_REV >= 6) */
@@ -3505,8 +4024,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
if (type & SLJIT_MEM_STORE) {
FAIL_IF(push_inst(compiler, SRA_W | T(reg) | D(TMP_REG2) | SH_IMM(8), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(TMP_REG2) | MEM16_IMM_FIRST(memw), MOVABLE_INS));
- return push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(reg) | MEM16_IMM_SECOND(memw), MOVABLE_INS);
+ FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(TMP_REG2) | IMM_16_FIRST(memw), MOVABLE_INS));
+ return push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(reg) | IMM_16_SECOND(memw), MOVABLE_INS);
}
flags = BYTE_DATA | LOAD_DATA;
@@ -3514,15 +4033,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
if (op == SLJIT_MOV_S16)
flags |= SIGNED_DATA;
- FAIL_IF(push_inst(compiler, data_transfer_insts[flags] | S(mem) | T(TMP_REG2) | MEM16_IMM_FIRST(memw), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA | LOAD_DATA] | S(mem) | T(reg) | MEM16_IMM_SECOND(memw), DR(reg)));
+ FAIL_IF(push_inst(compiler, data_transfer_insts[flags] | S(mem) | T(TMP_REG2) | IMM_16_FIRST(memw), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA | LOAD_DATA] | S(mem) | T(reg) | IMM_16_SECOND(memw), DR(reg)));
FAIL_IF(push_inst(compiler, SLL_W | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(8), DR(TMP_REG2)));
return push_inst(compiler, OR | S(reg) | T(TMP_REG2) | D(reg), DR(reg));
case SLJIT_MOV:
case SLJIT_MOV_P:
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
- if (type & SLJIT_MEM_UNALIGNED_32) {
+ if (type & SLJIT_MEM_ALIGNED_32) {
flags = WORD_DATA;
if (!(type & SLJIT_MEM_STORE))
flags |= LOAD_DATA;
@@ -3534,8 +4053,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2);
if (type & SLJIT_MEM_STORE) {
- FAIL_IF(push_inst(compiler, SDL | S(mem) | T(reg) | IMM(memw), MOVABLE_INS));
- return push_inst(compiler, SDR | S(mem) | T(reg) | IMM(memw + 7), MOVABLE_INS);
+ FAIL_IF(push_inst(compiler, SDL | S(mem) | T(reg) | IMM_LEFT(memw), MOVABLE_INS));
+ return push_inst(compiler, SDR | S(mem) | T(reg) | IMM_RIGHT(memw), MOVABLE_INS);
}
if (mem == reg) {
@@ -3543,8 +4062,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
mem = TMP_REG1;
}
- FAIL_IF(push_inst(compiler, LDL | S(mem) | T(reg) | IMM(memw), DR(reg)));
- return push_inst(compiler, LDR | S(mem) | T(reg) | IMM(memw + 7), DR(reg));
+ FAIL_IF(push_inst(compiler, LDL | S(mem) | T(reg) | IMM_LEFT(memw), DR(reg)));
+ return push_inst(compiler, LDR | S(mem) | T(reg) | IMM_RIGHT(memw), DR(reg));
#endif /* SLJIT_CONFIG_MIPS_32 */
}
@@ -3552,8 +4071,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2);
if (type & SLJIT_MEM_STORE) {
- FAIL_IF(push_inst(compiler, SWL | S(mem) | T(reg) | IMM(memw), MOVABLE_INS));
- return push_inst(compiler, SWR | S(mem) | T(reg) | IMM(memw + 3), MOVABLE_INS);
+ FAIL_IF(push_inst(compiler, SWL | S(mem) | T(reg) | IMM_32_LEFT(memw), MOVABLE_INS));
+ return push_inst(compiler, SWR | S(mem) | T(reg) | IMM_32_RIGHT(memw), MOVABLE_INS);
}
if (mem == reg) {
@@ -3561,18 +4080,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
mem = TMP_REG1;
}
- FAIL_IF(push_inst(compiler, LWL | S(mem) | T(reg) | IMM(memw), DR(reg)));
+ FAIL_IF(push_inst(compiler, LWL | S(mem) | T(reg) | IMM_32_LEFT(memw), DR(reg)));
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
- return push_inst(compiler, LWR | S(mem) | T(reg) | IMM(memw + 3), DR(reg));
+ return push_inst(compiler, LWR | S(mem) | T(reg) | IMM_32_RIGHT(memw), DR(reg));
#else /* !SLJIT_CONFIG_MIPS_32 */
- FAIL_IF(push_inst(compiler, LWR | S(mem) | T(reg) | IMM(memw + 3), DR(reg)));
+ FAIL_IF(push_inst(compiler, LWR | S(mem) | T(reg) | IMM_32_RIGHT(memw), DR(reg)));
if (op != SLJIT_MOV_U32)
return SLJIT_SUCCESS;
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
- return push_inst(compiler, DINSU | T(reg) | SA(0) | (31 << 11) | (0 << 11), DR(reg));
-#else /* SLJIT_MIPS_REV < 1 */
+ return push_inst(compiler, DINSU | T(reg) | SA(0) | (31 << 11), DR(reg));
+#else /* SLJIT_MIPS_REV < 2 */
FAIL_IF(push_inst(compiler, DSLL32 | T(reg) | D(reg) | SH_IMM(0), DR(reg)));
return push_inst(compiler, DSRL32 | T(reg) | D(reg) | SH_IMM(0), DR(reg));
#endif /* SLJIT_MIPS_REV >= 2 */
@@ -3595,77 +4114,97 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
if (type & SLJIT_MEM_STORE) {
if (type & SLJIT_32) {
FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2)));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
+#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1)
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
- FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS));
- return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), MOVABLE_INS);
+#endif /* MIPS III */
+ FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM_32_LEFT(memw), MOVABLE_INS));
+ return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM_32_RIGHT(memw), MOVABLE_INS);
}
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
- FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | MEMF64_FS_FIRST(freg), DR(TMP_REG2)));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
+ FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2)));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+ FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_LEFT(memw), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_RIGHT(memw), MOVABLE_INS));
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ FAIL_IF(push_inst(compiler, MFHC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2)));
+ break;
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg) | (1 << 11), DR(TMP_REG2)));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
#endif
- FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS));
- FAIL_IF(push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), MOVABLE_INS));
+ break;
+ }
- FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | MEMF64_FS_SECOND(freg), DR(TMP_REG2)));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
- FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
- FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw + 4), MOVABLE_INS));
- return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 7), MOVABLE_INS);
+ FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_LEFT(memw), MOVABLE_INS));
+ return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_RIGHT(memw), MOVABLE_INS);
#else /* !SLJIT_CONFIG_MIPS_32 */
- FAIL_IF(push_inst(compiler, MFC1 | (1 << 21) | T(TMP_REG2) | FS(freg), DR(TMP_REG2)));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
+ FAIL_IF(push_inst(compiler, DMFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2)));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
- FAIL_IF(push_inst(compiler, SDL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS));
- return push_inst(compiler, SDR | S(mem) | T(TMP_REG2) | IMM(memw + 7), MOVABLE_INS);
+#endif /* MIPS III */
+ FAIL_IF(push_inst(compiler, SDL | S(mem) | T(TMP_REG2) | IMM_LEFT(memw), MOVABLE_INS));
+ return push_inst(compiler, SDR | S(mem) | T(TMP_REG2) | IMM_RIGHT(memw), MOVABLE_INS);
#endif /* SLJIT_CONFIG_MIPS_32 */
}
if (type & SLJIT_32) {
- FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM_32_LEFT(memw), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM_32_RIGHT(memw), DR(TMP_REG2)));
FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
+#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1)
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
+#endif /* MIPS III */
return SLJIT_SUCCESS;
}
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
- FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | MEMF64_FS_FIRST(freg), MOVABLE_INS));
-
- FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw + 4), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 7), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | MEMF64_FS_SECOND(freg), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
- FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
+ FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_LEFT(memw), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_RIGHT(memw), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS));
+
+ FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_LEFT(memw), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_RIGHT(memw), DR(TMP_REG2)));
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ return push_inst(compiler, MTHC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS);
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg) | (1 << 11), MOVABLE_INS));
+ break;
+ }
#else /* !SLJIT_CONFIG_MIPS_32 */
- FAIL_IF(push_inst(compiler, LDL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, LDR | S(mem) | T(TMP_REG2) | IMM(memw + 7), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LDL | S(mem) | T(TMP_REG2) | IMM_LEFT(memw), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LDR | S(mem) | T(TMP_REG2) | IMM_RIGHT(memw), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, MTC1 | (1 << 21) | T(TMP_REG2) | FS(freg), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
- FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
+ FAIL_IF(push_inst(compiler, DMTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS));
#endif /* SLJIT_CONFIG_MIPS_32 */
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
return SLJIT_SUCCESS;
}
#endif /* !SLJIT_MIPS_REV || SLJIT_MIPS_REV < 6 */
-#undef MEM16_IMM_FIRST
-#undef MEM16_IMM_SECOND
-#undef MEMF64_FS_FIRST
-#undef MEMF64_FS_SECOND
+#undef IMM_16_SECOND
+#undef IMM_16_FIRST
+#undef IMM_F64_SECOND_RIGHT
+#undef IMM_F64_SECOND_LEFT
+#undef IMM_F64_FIRST_RIGHT
+#undef IMM_F64_FIRST_LEFT
+#undef IMM_32_RIGHT
+#undef IMM_32_LEFT
+#undef IMM_RIGHT
+#undef IMM_LEFT
#undef MEM_CHECK_UNALIGNED
#undef TO_ARGW_HI
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c
index 9449e4b9d7..2352fad5d4 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c
@@ -85,10 +85,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
return SLJIT_SUCCESS;
- case SLJIT_NOT:
- SLJIT_ASSERT(src1 == TMP_REG1);
- return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
-
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1);
return push_inst(compiler, CNTLZW | S(src2) | A(dst));
@@ -246,6 +242,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(imm)));
return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(imm >> 16));
}
+ if (flags & ALT_FORM4) {
+ SLJIT_ASSERT(src1 == TMP_REG1);
+ return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
+ }
return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_SHL:
@@ -325,6 +325,151 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_
return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
}
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+ sljit_s32 invert_sign = 1;
+
+ if (src == SLJIT_IMM) {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ (sljit_sw)0x80000000));
+ src = TMP_REG1;
+ invert_sign = 0;
+ } else if (!FAST_IS_REG(src)) {
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
+ src = TMP_REG1;
+ }
+
+ /* First, a special double precision floating point value is constructed:
+ (2^53 + (src xor (2^31)))
+ The upper 32 bits of this number is a constant, and the lower 32 bits
+ is simply the value of the source argument. The xor 2^31 operation adds
+ 0x80000000 to the source argument, which moves it into the 0 - 0xffffffff
+ range. Finally we substract 2^53 + 2^31 to get the converted value. */
+ FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
+ if (invert_sign)
+ FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
+ FAIL_IF(push_inst(compiler, STW | S(TMP_REG2) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI));
+ FAIL_IF(push_inst(compiler, STW | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
+ FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
+ FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ FAIL_IF(push_inst(compiler, STW | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
+ FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG2) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+
+ FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
+
+ if (op & SLJIT_32)
+ FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)));
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1);
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+ if (src == SLJIT_IMM) {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ src = TMP_REG1;
+ } else if (!FAST_IS_REG(src)) {
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
+ src = TMP_REG1;
+ }
+
+ /* First, a special double precision floating point value is constructed:
+ (2^53 + src)
+ The upper 32 bits of this number is a constant, and the lower 32 bits
+ is simply the value of the source argument. Finally we substract 2^53
+ to get the converted value. */
+ FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
+ FAIL_IF(push_inst(compiler, STW | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
+ FAIL_IF(push_inst(compiler, STW | S(TMP_REG2) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI));
+
+ FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ FAIL_IF(push_inst(compiler, STW | S(TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
+ FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG2) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+
+ FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
+
+ if (op & SLJIT_32)
+ FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)));
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1);
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
+{
+ union {
+ sljit_s32 imm[2];
+ sljit_f64 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
+
+ u.value = value;
+
+ if (u.imm[0] != 0)
+ FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
+ if (u.imm[1] != 0)
+ FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
+
+ /* Saved in the same endianness. */
+ FAIL_IF(push_inst(compiler, STW | S(u.imm[0] != 0 ? TMP_REG1 : TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ FAIL_IF(push_inst(compiler, STW | S(u.imm[1] != 0 ? TMP_REG2 : TMP_ZERO) | A(SLJIT_SP) | (TMP_MEM_OFFSET + sizeof(sljit_s32))));
+ return push_inst(compiler, LFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ sljit_s32 reg2 = 0;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ if (op & SLJIT_32) {
+ if (op == SLJIT_COPY32_TO_F32) {
+ FAIL_IF(push_inst(compiler, STW | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ return push_inst(compiler, LFS | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET);
+ }
+
+ FAIL_IF(push_inst(compiler, STFS | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ return push_inst(compiler, LWZ | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET);
+ }
+
+ if (reg & REG_PAIR_MASK) {
+ reg2 = REG_PAIR_SECOND(reg);
+ reg = REG_PAIR_FIRST(reg);
+ }
+
+ if (op == SLJIT_COPY_TO_F64) {
+ FAIL_IF(push_inst(compiler, STW | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI));
+
+ if (reg2 != 0)
+ FAIL_IF(push_inst(compiler, STW | S(reg2) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
+ else
+ FAIL_IF(push_inst(compiler, STFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
+
+ return push_inst(compiler, LFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET);
+ }
+
+ FAIL_IF(push_inst(compiler, STFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+
+ if (reg2 != 0)
+ FAIL_IF(push_inst(compiler, LWZ | S(reg2) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
+
+ return push_inst(compiler, LWZ | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI);
+}
+
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins *)addr;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c
index 80549108bf..b3cf9d074d 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c
@@ -49,7 +49,7 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
if (imm <= SIMM_MAX && imm >= SIMM_MIN)
return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm));
- if (!(imm & ~0xffff))
+ if (((sljit_uw)imm >> 16) == 0)
return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm));
if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
@@ -57,6 +57,11 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
}
+ if (((sljit_uw)imm >> 32) == 0) {
+ FAIL_IF(push_inst(compiler, ORIS | S(TMP_ZERO) | A(reg) | IMM(imm >> 16)));
+ return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
+ }
+
/* Count leading zeroes. */
tmp = (sljit_uw)((imm >= 0) ? imm : ~imm);
ASM_SLJIT_CLZ(tmp, shift);
@@ -198,11 +203,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
return SLJIT_SUCCESS;
- case SLJIT_NOT:
- SLJIT_ASSERT(src1 == TMP_REG1);
- UN_EXTS();
- return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
-
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1);
return push_inst(compiler, ((flags & ALT_FORM1) ? CNTLZW : CNTLZD) | S(src2) | A(dst));
@@ -399,6 +399,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(imm)));
return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(imm >> 16));
}
+ if (flags & ALT_FORM4) {
+ SLJIT_ASSERT(src1 == TMP_REG1);
+ UN_EXTS();
+ return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
+ }
return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_SHL:
@@ -563,6 +568,141 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_
return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
}
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+ if (src == SLJIT_IMM) {
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
+ srcw = (sljit_s32)srcw;
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ src = TMP_REG1;
+ } else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
+ if (FAST_IS_REG(src))
+ FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
+ else
+ FAIL_IF(emit_op_mem(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
+ src = TMP_REG1;
+ }
+
+ if (FAST_IS_REG(src)) {
+ FAIL_IF(push_inst(compiler, STD | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ } else
+ FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1));
+
+ FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
+
+ if (op & SLJIT_32)
+ FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)));
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1);
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {
+ if (src == SLJIT_IMM) {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_u32)srcw));
+ src = TMP_REG1;
+ } else {
+ if (FAST_IS_REG(src))
+ FAIL_IF(push_inst(compiler, CLRLDI(TMP_REG1, src, 32)));
+ else
+ FAIL_IF(emit_op_mem(compiler, INT_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
+ src = TMP_REG1;
+ }
+
+ FAIL_IF(push_inst(compiler, STD | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
+ } else {
+ if (src == SLJIT_IMM) {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ src = TMP_REG1;
+ } else if (src & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
+ src = TMP_REG1;
+ }
+
+ FAIL_IF(push_inst(compiler, CMPI | CRD(0 | 1) | A(src) | 0));
+ FAIL_IF(push_inst(compiler, BCx | (12 << 21) | (0 << 16) | 20));
+ FAIL_IF(push_inst(compiler, STD | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
+ FAIL_IF(push_inst(compiler, Bx | ((op & SLJIT_32) ? 36 : 32)));
+
+ if (op & SLJIT_32)
+ FAIL_IF(push_inst(compiler, RLWINM | S(src) | A(TMP_REG2) | RLWI_SH(10) | RLWI_MBE(10, 21)));
+ else
+ FAIL_IF(push_inst(compiler, ANDI | S(src) | A(TMP_REG2) | 0x1));
+
+ /* Shift right. */
+ FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(63) | RLDI_MB(1)));
+
+ if (op & SLJIT_32)
+ FAIL_IF(push_inst(compiler, RLDICR | S(TMP_REG1) | A(TMP_REG1) | RLDI_SH(0) | RLDI_ME(53)));
+
+ FAIL_IF(push_inst(compiler, OR | S(TMP_REG1) | A(TMP_REG1) | B(TMP_REG2)));
+
+ FAIL_IF(push_inst(compiler, STD | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
+ FAIL_IF(push_inst(compiler, FADD | FD(dst_r) | FA(dst_r) | FB(dst_r)));
+ }
+
+ if (op & SLJIT_32)
+ FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)));
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1);
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
+{
+ union {
+ sljit_sw imm;
+ sljit_f64 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
+
+ u.value = value;
+
+ if (u.imm != 0)
+ FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
+
+ FAIL_IF(push_inst(compiler, STD | S(u.imm != 0 ? TMP_REG1 : TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ return push_inst(compiler, LFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
+ FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? STW : STD) | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ return push_inst(compiler, ((op & SLJIT_32) ? LFS : LFD) | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET);
+ }
+
+ FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? STFS : STFD) | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ return push_inst(compiler, ((op & SLJIT_32) ? LWZ : LD) | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET);
+}
+
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins*)addr;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c
index f387114733..54977f02e3 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c
@@ -132,7 +132,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
OE and Rc flag (see ALT_SET_FLAGS). */
#define OE(flags) ((flags) & ALT_SET_FLAGS)
/* Rc flag (see ALT_SET_FLAGS). */
-#define RC(flags) (((flags) & ALT_SET_FLAGS) >> 10)
+#define RC(flags) ((sljit_ins)((flags) & ALT_SET_FLAGS) >> 10)
#define HI(opcode) ((sljit_ins)(opcode) << 26)
#define LO(opcode) ((sljit_ins)(opcode) << 1)
@@ -150,6 +150,9 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define BCx (HI(16))
#define BCCTR (HI(19) | LO(528) | (3 << 11))
#define BLR (HI(19) | LO(16) | (0x14 << 21))
+#if defined(_ARCH_PWR10) && _ARCH_PWR10
+#define BRD (HI(31) | LO(187))
+#endif /* POWER10 */
#define CNTLZD (HI(31) | LO(58))
#define CNTLZW (HI(31) | LO(26))
#define CMP (HI(31) | LO(0))
@@ -183,6 +186,12 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define FSUBS (HI(59) | LO(20))
#define LD (HI(58) | 0)
#define LFD (HI(50))
+#define LFS (HI(48))
+#if defined(_ARCH_PWR7) && _ARCH_PWR7
+#define LDBRX (HI(31) | LO(532))
+#endif /* POWER7 */
+#define LHBRX (HI(31) | LO(790))
+#define LWBRX (HI(31) | LO(534))
#define LWZ (HI(32))
#define MFCR (HI(31) | LO(19))
#define MFLR (HI(31) | LO(339) | 0x80000)
@@ -219,11 +228,17 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SRD (HI(31) | LO(539))
#define SRW (HI(31) | LO(536))
#define STD (HI(62) | 0)
+#if defined(_ARCH_PWR7) && _ARCH_PWR7
+#define STDBRX (HI(31) | LO(660))
+#endif /* POWER7 */
#define STDU (HI(62) | 1)
#define STDUX (HI(31) | LO(181))
#define STFD (HI(54))
#define STFIWX (HI(31) | LO(983))
+#define STFS (HI(52))
+#define STHBRX (HI(31) | LO(918))
#define STW (HI(36))
+#define STWBRX (HI(31) | LO(662))
#define STWU (HI(37))
#define STWUX (HI(31) | LO(183))
#define SUBF (HI(31) | LO(40))
@@ -253,10 +268,24 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
#define SLWI_W(shift) SLWI(shift)
+#define TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
#else /* !SLJIT_CONFIG_PPC_32 */
#define SLWI_W(shift) SLDI(shift)
+#define TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
#endif /* SLJIT_CONFIG_PPC_32 */
+#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#define TMP_MEM_OFFSET_LO (TMP_MEM_OFFSET)
+#define TMP_MEM_OFFSET_HI (TMP_MEM_OFFSET + sizeof(sljit_s32))
+#define LWBRX_FIRST_REG S(TMP_REG1)
+#define LWBRX_SECOND_REG S(dst)
+#else /* !SLJIT_LITTLE_ENDIAN */
+#define TMP_MEM_OFFSET_LO (TMP_MEM_OFFSET + sizeof(sljit_s32))
+#define TMP_MEM_OFFSET_HI (TMP_MEM_OFFSET)
+#define LWBRX_FIRST_REG S(dst)
+#define LWBRX_SECOND_REG S(TMP_REG1)
+#endif /* SLJIT_LITTLE_ENDIAN */
+
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func)
{
@@ -423,6 +452,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
reverse_buf(compiler);
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+ /* add to compiler->size additional instruction space to hold the trampoline and padding */
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
#else
@@ -623,7 +653,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_offset = executable_offset;
- compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
@@ -641,8 +670,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+ compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins) + sizeof(struct sljit_function_context);
+
return code_ptr;
#else
+ compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
+
return code;
#endif
}
@@ -652,12 +685,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
switch (feature_type) {
case SLJIT_HAS_FPU:
#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
#else
/* Available by default. */
return 1;
#endif
-
+ case SLJIT_HAS_REV:
+#if defined(_ARCH_PWR10) && _ARCH_PWR10
+ return 1;
+#else /* !POWER10 */
+ return 2;
+#endif /* POWER10 */
/* A saved register is set to a zero value. */
case SLJIT_HAS_ZERO_REGISTER:
case SLJIT_HAS_CLZ:
@@ -675,7 +713,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
{
- return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL);
+ switch (type) {
+ case SLJIT_UNORDERED_OR_EQUAL:
+ case SLJIT_ORDERED_NOT_EQUAL:
+ case SLJIT_UNORDERED_OR_LESS:
+ case SLJIT_ORDERED_GREATER_EQUAL:
+ case SLJIT_UNORDERED_OR_GREATER:
+ case SLJIT_ORDERED_LESS_EQUAL:
+ return 1;
+ }
+
+ return 0;
}
/* --------------------------------------------------------------------- */
@@ -699,6 +747,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
#define MEM_MASK 0x7f
+#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 6))
+
/* Other inp_flags. */
/* Integer opertion and set flags -> requires exts on 64 bit systems. */
@@ -722,6 +772,9 @@ ALT_FORM1 0x001000
...
ALT_FORM5 0x010000 */
+static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg,
+ sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg);
+
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
#include "sljitNativePPC_32.c"
#else
@@ -737,16 +790,13 @@ ALT_FORM5 0x010000 */
#endif
#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
-#define LR_SAVE_OFFSET 2 * SSIZE_OF(sw)
+#define LR_SAVE_OFFSET (2 * SSIZE_OF(sw))
#else
#define LR_SAVE_OFFSET SSIZE_OF(sw)
#endif
#define STACK_MAX_DISTANCE (0x8000 - SSIZE_OF(sw) - LR_SAVE_OFFSET)
-static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg,
- sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg);
-
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
@@ -763,7 +813,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 0)
- + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
+ + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
if (!(options & SLJIT_ENTER_REG_ARG))
local_size += SSIZE_OF(sw);
@@ -873,7 +923,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 0)
- + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
+ + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
if (!(options & SLJIT_ENTER_REG_ARG))
local_size += SSIZE_OF(sw);
@@ -1222,7 +1272,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
src1_r = src1;
flags |= REG1_SOURCE;
}
- else if (src1 & SLJIT_IMM) {
+ else if (src1 == SLJIT_IMM) {
src1_r = TMP_ZERO;
if (src1w != 0) {
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
@@ -1242,7 +1292,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P)
dst_r = src2_r;
}
- else if (src2 & SLJIT_IMM) {
+ else if (src2 == SLJIT_IMM) {
src2_r = TMP_ZERO;
if (src2w != 0) {
FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
@@ -1312,29 +1362,161 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
-static sljit_s32 emit_prefetch(struct sljit_compiler *compiler,
- sljit_s32 src, sljit_sw srcw)
+static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
{
- if (!(src & OFFS_REG_MASK)) {
- if (srcw == 0 && (src & REG_MASK))
- return push_inst(compiler, DCBT | A(0) | B(src & REG_MASK));
+ sljit_s32 mem, offs_reg, inp_flags;
+ sljit_sw memw;
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ sljit_s32 is_32 = op & SLJIT_32;
- FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
- /* Works with SLJIT_MEM0() case as well. */
- return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
+ op = GET_OPCODE(op);
+#endif /* SLJIT_CONFIG_PPC_64 */
+
+ if (!((dst | src) & SLJIT_MEM)) {
+ /* Both are registers. */
+ if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
+ if (src == dst) {
+ FAIL_IF(push_inst(compiler, RLWIMI | S(dst) | A(dst) | RLWI_SH(16) | RLWI_MBE(8, 15)));
+ FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | RLWI_SH(24) | RLWI_MBE(16, 31)));
+ } else {
+ FAIL_IF(push_inst(compiler, RLWINM | S(src) | A(dst) | RLWI_SH(8) | RLWI_MBE(16, 23)));
+ FAIL_IF(push_inst(compiler, RLWIMI | S(src) | A(dst) | RLWI_SH(24) | RLWI_MBE(24, 31)));
+ }
+
+ if (op == SLJIT_REV_U16)
+ return SLJIT_SUCCESS;
+ return push_inst(compiler, EXTSH | S(dst) | A(dst));
+ }
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ if (!is_32) {
+#if defined(_ARCH_PWR10) && _ARCH_PWR10
+ return push_inst(compiler, BRD | S(src) | A(dst));
+#else /* !POWER10 */
+ FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(0) | IMM(TMP_MEM_OFFSET_HI)));
+ FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(32) | RLDI_MB(32)));
+ FAIL_IF(push_inst(compiler, STWBRX | S(src) | A(SLJIT_SP) | B(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(0) | IMM(TMP_MEM_OFFSET_LO)));
+ FAIL_IF(push_inst(compiler, STWBRX | S(TMP_REG1) | A(SLJIT_SP) | B(TMP_REG2)));
+ return push_inst(compiler, LD | D(dst) | A(SLJIT_SP) | TMP_MEM_OFFSET);
+#endif /* POWER10 */
+ }
+#endif /* SLJIT_CONFIG_PPC_64 */
+
+ FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(0) | IMM(TMP_MEM_OFFSET)));
+ FAIL_IF(push_inst(compiler, STWBRX | S(src) | A(SLJIT_SP) | B(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LWZ | D(dst) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ if (op == SLJIT_REV_S32)
+ return push_inst(compiler, EXTSW | S(dst) | A(dst));
+#endif /* SLJIT_CONFIG_PPC_64 */
+ return SLJIT_SUCCESS;
}
- srcw &= 0x3;
+ mem = src;
+ memw = srcw;
- if (srcw == 0)
- return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src)));
+ if (dst & SLJIT_MEM) {
+ mem = dst;
+ memw = dstw;
- FAIL_IF(push_inst(compiler, SLWI_W(srcw) | S(OFFS_REG(src)) | A(TMP_REG1)));
- return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
+ if (src & SLJIT_MEM) {
+ inp_flags = HALF_DATA | LOAD_DATA;
+
+ if (op != SLJIT_REV_U16 && op != SLJIT_REV_S16) {
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ inp_flags = (is_32 ? INT_DATA : WORD_DATA) | LOAD_DATA;
+#else /* !SLJIT_CONFIG_PPC_64 */
+ inp_flags = WORD_DATA | LOAD_DATA;
+#endif /* SLJIT_CONFIG_PPC_64 */
+ }
+
+ FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG1, src, srcw, TMP_REG2));
+ src = TMP_REG1;
+ }
+ }
+
+ if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
+ offs_reg = OFFS_REG(mem);
+ mem &= REG_MASK;
+ memw &= 0x3;
+
+ if (memw != 0) {
+ FAIL_IF(push_inst(compiler, SLWI_W(memw) | S(offs_reg) | A(TMP_REG2)));
+ offs_reg = TMP_REG2;
+ }
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ } else if (memw > 0x7fff7fffl || memw < -0x80000000l) {
+ FAIL_IF(load_immediate(compiler, TMP_REG2, memw));
+ offs_reg = TMP_REG2;
+ mem &= REG_MASK;
+#endif /* SLJIT_CONFIG_PPC_64 */
+ } else {
+ FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(mem & REG_MASK) | IMM(memw)));
+ if (memw > SIMM_MAX || memw < SIMM_MIN)
+ FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(TMP_REG2) | IMM((memw + 0x8000) >> 16)));
+
+ mem = 0;
+ offs_reg = TMP_REG2;
+ }
+
+ if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
+ if (dst & SLJIT_MEM)
+ return push_inst(compiler, STHBRX | S(src) | A(mem) | B(offs_reg));
+
+ FAIL_IF(push_inst(compiler, LHBRX | S(dst) | A(mem) | B(offs_reg)));
+
+ if (op == SLJIT_REV_U16)
+ return SLJIT_SUCCESS;
+ return push_inst(compiler, EXTSH | S(dst) | A(dst));
+ }
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ if (!is_32) {
+ if (dst & SLJIT_MEM) {
+#if defined(_ARCH_PWR7) && _ARCH_PWR7
+ return push_inst(compiler, STDBRX | S(src) | A(mem) | B(offs_reg));
+#else /* !POWER7 */
+#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN
+ FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(32) | RLDI_MB(32)));
+ FAIL_IF(push_inst(compiler, STWBRX | S(TMP_REG1) | A(mem) | B(offs_reg)));
+ FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(offs_reg) | IMM(SSIZE_OF(s32))));
+ return push_inst(compiler, STWBRX | S(src) | A(mem) | B(TMP_REG2));
+#else /* !SLJIT_LITTLE_ENDIAN */
+ FAIL_IF(push_inst(compiler, STWBRX | S(src) | A(mem) | B(offs_reg)));
+ FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(32) | RLDI_MB(32)));
+ FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(offs_reg) | IMM(SSIZE_OF(s32))));
+ return push_inst(compiler, STWBRX | S(TMP_REG1) | A(mem) | B(TMP_REG2));
+#endif /* SLJIT_LITTLE_ENDIAN */
+#endif /* POWER7 */
+ }
+#if defined(_ARCH_PWR7) && _ARCH_PWR7
+ return push_inst(compiler, LDBRX | S(dst) | A(mem) | B(offs_reg));
+#else /* !POWER7 */
+ FAIL_IF(push_inst(compiler, LWBRX | LWBRX_FIRST_REG | A(mem) | B(offs_reg)));
+ FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(offs_reg) | IMM(SSIZE_OF(s32))));
+ FAIL_IF(push_inst(compiler, LWBRX | LWBRX_SECOND_REG | A(mem) | B(TMP_REG2)));
+ return push_inst(compiler, RLDIMI | S(TMP_REG1) | A(dst) | RLDI_SH(32) | RLDI_MB(0));
+#endif /* POWER7 */
+ }
+#endif /* SLJIT_CONFIG_PPC_64 */
+
+ if (dst & SLJIT_MEM)
+ return push_inst(compiler, STWBRX | S(src) | A(mem) | B(offs_reg));
+
+ FAIL_IF(push_inst(compiler, LWBRX | S(dst) | A(mem) | B(offs_reg)));
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ if (op == SLJIT_REV_S32)
+ return push_inst(compiler, EXTSW | S(dst) | A(dst));
+#endif /* SLJIT_CONFIG_PPC_64 */
+ return SLJIT_SUCCESS;
}
#define EMIT_MOV(type, type_flags, type_cast) \
- emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
+ emit_op(compiler, (src == SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? type_cast srcw : srcw)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
@@ -1353,19 +1535,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW)
FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
- if (op < SLJIT_NOT && FAST_IS_REG(src) && src == dst) {
+ if (op <= SLJIT_MOV_P && FAST_IS_REG(src) && src == dst) {
if (!TYPE_CAST_NEEDED(op))
return SLJIT_SUCCESS;
}
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (op_flags & SLJIT_32) {
- if (op < SLJIT_NOT) {
+ if (op <= SLJIT_MOV_P) {
if (src & SLJIT_MEM) {
if (op == SLJIT_MOV_S32)
op = SLJIT_MOV_U32;
}
- else if (src & SLJIT_IMM) {
+ else if (src == SLJIT_IMM) {
if (op == SLJIT_MOV_U32)
op = SLJIT_MOV_S32;
}
@@ -1410,16 +1592,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
case SLJIT_MOV_S16:
return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16));
- case SLJIT_NOT:
- return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
-
case SLJIT_CLZ:
case SLJIT_CTZ:
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
- return emit_op(compiler, op, flags | (!(op_flags & SLJIT_32) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
-#else
+ if (op_flags & SLJIT_32)
+ flags |= ALT_FORM1;
+#endif /* SLJIT_CONFIG_PPC_64 */
return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
-#endif
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ op |= SLJIT_32;
+#endif /* SLJIT_CONFIG_PPC_64 */
+ /* fallthrough */
+ case SLJIT_REV:
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ op |= (op_flags & SLJIT_32);
+#endif /* SLJIT_CONFIG_PPC_64 */
+ return emit_rev(compiler, op, dst, dstw, src, srcw);
}
return SLJIT_SUCCESS;
@@ -1427,40 +1619,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
#undef EMIT_MOV
+/* Macros for checking different operand types / values. */
#define TEST_SL_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
-
+ ((src) == SLJIT_IMM && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
#define TEST_UL_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
-
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-#define TEST_SH_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
-#else
-#define TEST_SH_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && !((srcw) & 0xffff))
-#endif
-
+ ((src) == SLJIT_IMM && !((srcw) & ~0xffff))
#define TEST_UH_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && !((srcw) & ~(sljit_sw)0xffff0000))
+ ((src) == SLJIT_IMM && !((srcw) & ~(sljit_sw)0xffff0000))
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define TEST_SH_IMM(src, srcw) \
+ ((src) == SLJIT_IMM && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
#define TEST_ADD_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
-#else
-#define TEST_ADD_IMM(src, srcw) \
- ((src) & SLJIT_IMM)
-#endif
-
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-#define TEST_UI_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
-#else
+ ((src) == SLJIT_IMM && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
#define TEST_UI_IMM(src, srcw) \
- ((src) & SLJIT_IMM)
-#endif
+ ((src) == SLJIT_IMM && !((srcw) & ~0xffffffff))
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
#define TEST_ADD_FORM1(op) \
(GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \
|| (op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_32 | SLJIT_SET_Z | SLJIT_SET_CARRY))
@@ -1470,14 +1644,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
#define TEST_SUB_FORM3(op) \
(GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \
|| (op & (SLJIT_32 | SLJIT_SET_Z)) == (SLJIT_32 | SLJIT_SET_Z))
-#else
+
+#else /* !SLJIT_CONFIG_PPC_64 */
+#define TEST_SH_IMM(src, srcw) \
+ ((src) == SLJIT_IMM && !((srcw) & 0xffff))
+#define TEST_ADD_IMM(src, srcw) \
+ ((src) == SLJIT_IMM)
+#define TEST_UI_IMM(src, srcw) \
+ ((src) == SLJIT_IMM)
+
#define TEST_ADD_FORM1(op) \
(GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
#define TEST_SUB_FORM2(op) \
(GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL)
#define TEST_SUB_FORM3(op) \
(GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
-#endif
+#endif /* SLJIT_CONFIG_PPC_64 */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
@@ -1496,9 +1678,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
if (op & SLJIT_32) {
/* Most operations expect sign extended arguments. */
flags |= INT_DATA | SIGNED_DATA;
- if (src1 & SLJIT_IMM)
+ if (src1 == SLJIT_IMM)
src1w = (sljit_s32)(src1w);
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
src2w = (sljit_s32)(src2w);
if (HAS_FLAGS(op))
flags |= ALT_SIGN_EXT;
@@ -1514,7 +1696,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
if (TEST_ADD_FORM1(op))
return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
- if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
+ if (!HAS_FLAGS(op) && (src1 == SLJIT_IMM || src2 == SLJIT_IMM)) {
if (TEST_SL_IMM(src2, src2w)) {
compiler->imm = (sljit_ins)src2w & 0xffff;
return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
@@ -1565,7 +1747,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
}
}
- return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
+ return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == SLJIT_CARRY) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
case SLJIT_ADDC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
@@ -1583,7 +1765,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
}
- if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1)) {
+ if (src2 == SLJIT_IMM && src2w >= 0 && src2w <= (SIMM_MAX + 1)) {
compiler->imm = (sljit_ins)src2w;
return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
}
@@ -1599,7 +1781,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
}
if (TEST_SUB_FORM2(op)) {
- if ((src2 & SLJIT_IMM) && src2w >= -SIMM_MAX && src2w <= SIMM_MAX) {
+ if (src2 == SLJIT_IMM && src2w >= -SIMM_MAX && src2w <= SIMM_MAX) {
compiler->imm = (sljit_ins)src2w & 0xffff;
return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
}
@@ -1632,7 +1814,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
}
/* We know ALT_SIGN_EXT is set if it is an SLJIT_32 on 64 bit systems. */
- return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
+ return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == SLJIT_CARRY) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUBC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
@@ -1657,9 +1839,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
+ case SLJIT_XOR:
+ if (src2 == SLJIT_IMM && src2w == -1) {
+ return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM4, dst, dstw, TMP_REG1, 0, src1, src1w);
+ }
+ if (src1 == SLJIT_IMM && src1w == -1) {
+ return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM4, dst, dstw, TMP_REG1, 0, src2, src2w);
+ }
+ /* fallthrough */
case SLJIT_AND:
case SLJIT_OR:
- case SLJIT_XOR:
/* Commutative unsigned operations. */
if (!HAS_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
if (TEST_UL_IMM(src2, src2w)) {
@@ -1704,7 +1893,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
if (op & SLJIT_32)
flags |= ALT_FORM2;
#endif
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
compiler->imm = (sljit_ins)src2w;
return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
}
@@ -1730,9 +1919,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
#undef TEST_SUB_FORM3
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 src_dst,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
+ sljit_s32 dst_reg,
+ sljit_s32 src1_reg,
+ sljit_s32 src2_reg,
+ sljit_s32 src3, sljit_sw src3w)
{
sljit_s32 is_right;
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@@ -1744,85 +1934,97 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *
#endif /* SLJIT_CONFIG_PPC_64 */
CHECK_ERROR();
- CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
+ CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
- if (src_dst == src1) {
+ if (src1_reg == src2_reg) {
SLJIT_SKIP_CHECKS(compiler);
- return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
+ return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
}
- ADJUST_LOCAL_OFFSET(src1, src1w);
- ADJUST_LOCAL_OFFSET(src2, src2w);
+ ADJUST_LOCAL_OFFSET(src3, src3w);
- if (src2 & SLJIT_IMM) {
- src2w &= bit_length - 1;
+ if (src3 == SLJIT_IMM) {
+ src3w &= bit_length - 1;
- if (src2w == 0)
+ if (src3w == 0)
return SLJIT_SUCCESS;
- } else if (src2 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src2, src2w, TMP_REG2));
- src2 = TMP_REG2;
- }
- if (src1 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG1, src1, src1w, TMP_REG1));
- src1 = TMP_REG1;
- } else if (src1 & SLJIT_IMM) {
- FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
- src1 = TMP_REG1;
- }
-
- if (src2 & SLJIT_IMM) {
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (!(op & SLJIT_32)) {
if (is_right) {
- FAIL_IF(push_inst(compiler, SRDI(src2w) | S(src_dst) | A(src_dst)));
- return push_inst(compiler, RLDIMI | S(src1) | A(src_dst) | RLDI_SH(64 - src2w) | RLDI_MB(0));
+ FAIL_IF(push_inst(compiler, SRDI(src3w) | S(src1_reg) | A(dst_reg)));
+ return push_inst(compiler, RLDIMI | S(src2_reg) | A(dst_reg) | RLDI_SH(64 - src3w) | RLDI_MB(0));
}
- FAIL_IF(push_inst(compiler, SLDI(src2w) | S(src_dst) | A(src_dst)));
+ FAIL_IF(push_inst(compiler, SLDI(src3w) | S(src1_reg) | A(dst_reg)));
/* Computes SRDI(64 - src2w). */
- FAIL_IF(push_inst(compiler, RLDICL | S(src1) | A(TMP_REG1) | RLDI_SH(src2w) | RLDI_MB(64 - src2w)));
- return push_inst(compiler, OR | S(src_dst) | A(src_dst) | B(TMP_REG1));
+ FAIL_IF(push_inst(compiler, RLDICL | S(src2_reg) | A(TMP_REG1) | RLDI_SH(src3w) | RLDI_MB(64 - src3w)));
+ return push_inst(compiler, OR | S(dst_reg) | A(dst_reg) | B(TMP_REG1));
}
#endif /* SLJIT_CONFIG_PPC_64 */
if (is_right) {
- FAIL_IF(push_inst(compiler, SRWI(src2w) | S(src_dst) | A(src_dst)));
- return push_inst(compiler, RLWIMI | S(src1) | A(src_dst) | RLWI_SH(32 - src2w) | RLWI_MBE(0, src2w - 1));
+ FAIL_IF(push_inst(compiler, SRWI(src3w) | S(src1_reg) | A(dst_reg)));
+ return push_inst(compiler, RLWIMI | S(src2_reg) | A(dst_reg) | RLWI_SH(32 - src3w) | RLWI_MBE(0, src3w - 1));
}
- FAIL_IF(push_inst(compiler, SLWI(src2w) | S(src_dst) | A(src_dst)));
- return push_inst(compiler, RLWIMI | S(src1) | A(src_dst) | RLWI_SH(src2w) | RLWI_MBE(32 - src2w, 31));
+ FAIL_IF(push_inst(compiler, SLWI(src3w) | S(src1_reg) | A(dst_reg)));
+ return push_inst(compiler, RLWIMI | S(src2_reg) | A(dst_reg) | RLWI_SH(src3w) | RLWI_MBE(32 - src3w, 31));
+ }
+
+ if (src3 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w, TMP_REG2));
+ src3 = TMP_REG2;
}
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (!(op & SLJIT_32)) {
- if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
- FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x3f));
- src2 = TMP_REG2;
+ if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR || dst_reg == src3) {
+ FAIL_IF(push_inst(compiler, ANDI | S(src3) | A(TMP_REG2) | 0x3f));
+ src3 = TMP_REG2;
}
- FAIL_IF(push_inst(compiler, (is_right ? SRD : SLD) | S(src_dst) | A(src_dst) | B(src2)));
- FAIL_IF(push_inst(compiler, (is_right ? SLDI(1) : SRDI(1)) | S(src1) | A(TMP_REG1)));
- FAIL_IF(push_inst(compiler, XORI | S(src2) | A(TMP_REG2) | 0x3f));
+ FAIL_IF(push_inst(compiler, (is_right ? SRD : SLD) | S(src1_reg) | A(dst_reg) | B(src3)));
+ FAIL_IF(push_inst(compiler, (is_right ? SLDI(1) : SRDI(1)) | S(src2_reg) | A(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, XORI | S(src3) | A(TMP_REG2) | 0x3f));
FAIL_IF(push_inst(compiler, (is_right ? SLD : SRD) | S(TMP_REG1) | A(TMP_REG1) | B(TMP_REG2)));
- return push_inst(compiler, OR | S(src_dst) | A(src_dst) | B(TMP_REG1));
+ return push_inst(compiler, OR | S(dst_reg) | A(dst_reg) | B(TMP_REG1));
}
#endif /* SLJIT_CONFIG_PPC_64 */
- if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
- FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f));
- src2 = TMP_REG2;
+ if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR || dst_reg == src3) {
+ FAIL_IF(push_inst(compiler, ANDI | S(src3) | A(TMP_REG2) | 0x1f));
+ src3 = TMP_REG2;
}
- FAIL_IF(push_inst(compiler, (is_right ? SRW : SLW) | S(src_dst) | A(src_dst) | B(src2)));
- FAIL_IF(push_inst(compiler, (is_right ? SLWI(1) : SRWI(1)) | S(src1) | A(TMP_REG1)));
- FAIL_IF(push_inst(compiler, XORI | S(src2) | A(TMP_REG2) | 0x1f));
+ FAIL_IF(push_inst(compiler, (is_right ? SRW : SLW) | S(src1_reg) | A(dst_reg) | B(src3)));
+ FAIL_IF(push_inst(compiler, (is_right ? SLWI(1) : SRWI(1)) | S(src2_reg) | A(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, XORI | S(src3) | A(TMP_REG2) | 0x1f));
FAIL_IF(push_inst(compiler, (is_right ? SLW : SRW) | S(TMP_REG1) | A(TMP_REG1) | B(TMP_REG2)));
- return push_inst(compiler, OR | S(src_dst) | A(src_dst) | B(TMP_REG1));
+ return push_inst(compiler, OR | S(dst_reg) | A(dst_reg) | B(TMP_REG1));
+}
+
+static sljit_s32 emit_prefetch(struct sljit_compiler *compiler,
+ sljit_s32 src, sljit_sw srcw)
+{
+ if (!(src & OFFS_REG_MASK)) {
+ if (srcw == 0 && (src & REG_MASK))
+ return push_inst(compiler, DCBT | A(0) | B(src & REG_MASK));
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ /* Works with SLJIT_MEM0() case as well. */
+ return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
+ }
+
+ srcw &= 0x3;
+
+ if (srcw == 0)
+ return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src)));
+
+ FAIL_IF(push_inst(compiler, SLWI_W(srcw) | S(OFFS_REG(src)) | A(TMP_REG1)));
+ return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1854,21 +2056,52 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw)
{
- CHECK_REG_INDEX(check_sljit_get_register_index(reg));
- return reg_map[reg];
+ sljit_s32 dst_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ switch (op) {
+ case SLJIT_FAST_ENTER:
+ if (FAST_IS_REG(dst))
+ return push_inst(compiler, MFLR | D(dst));
+
+ FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG1)));
+ break;
+ case SLJIT_GET_RETURN_ADDRESS:
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + LR_SAVE_OFFSET, TMP_REG2));
+ break;
+ }
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2);
+
+ return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
{
- CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+ CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
+
+ if (type == SLJIT_GP_REGISTER)
+ return reg_map[reg];
+
+ if (type != SLJIT_FLOAT_REGISTER)
+ return -1;
+
return freg_map[reg];
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_u32 size)
{
+ SLJIT_UNUSED_ARG(size);
+
CHECK_ERROR();
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
@@ -1879,24 +2112,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
/* Floating point operators */
/* --------------------------------------------------------------------- */
-#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 6))
#define SELECT_FOP(op, single, double) ((sljit_ins)((op & SLJIT_32) ? single : double))
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-#define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
-#else
-#define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
-
-#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
-#define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
-#define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
-#else
-#define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
-#define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
-#endif
-
-#endif /* SLJIT_CONFIG_PPC_64 */
-
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
@@ -1913,19 +2130,19 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
if (op == SLJIT_CONV_SW_FROM_F64) {
if (FAST_IS_REG(dst)) {
- FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
- return emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1);
+ FAIL_IF(push_inst(compiler, STFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ return push_inst(compiler, LD | S(dst) | A(SLJIT_SP) | TMP_MEM_OFFSET);
}
return emit_op_mem(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, TMP_REG1);
}
-#else
+#else /* !SLJIT_CONFIG_PPC_64 */
FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
-#endif
+#endif /* SLJIT_CONFIG_PPC_64 */
if (FAST_IS_REG(dst)) {
- FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
+ FAIL_IF(load_immediate(compiler, TMP_REG1, TMP_MEM_OFFSET));
FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
- return emit_op_mem(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1);
+ return push_inst(compiler, LWZ | S(dst) | A(SLJIT_SP) | TMP_MEM_OFFSET);
}
SLJIT_ASSERT(dst & SLJIT_MEM);
@@ -1935,16 +2152,14 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
if (dstw) {
FAIL_IF(push_inst(compiler, SLWI_W(dstw) | S(OFFS_REG(dst)) | A(TMP_REG1)));
dstw = TMP_REG1;
- }
- else
+ } else
dstw = OFFS_REG(dst);
}
else {
if ((dst & REG_MASK) && !dstw) {
dstw = dst & REG_MASK;
dst = 0;
- }
- else {
+ } else {
/* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
dstw = TMP_REG1;
@@ -1954,85 +2169,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
}
-static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw)
-{
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-
- sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
-
- if (src & SLJIT_IMM) {
- if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
- srcw = (sljit_s32)srcw;
-
- FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
- src = TMP_REG1;
- }
- else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
- if (FAST_IS_REG(src))
- FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
- else
- FAIL_IF(emit_op_mem(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
- src = TMP_REG1;
- }
-
- if (FAST_IS_REG(src)) {
- FAIL_IF(emit_op_mem(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
- FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
- }
- else
- FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1));
-
- FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
-
- if (dst & SLJIT_MEM)
- return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1);
- if (op & SLJIT_32)
- return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
- return SLJIT_SUCCESS;
-
-#else
-
- sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- sljit_s32 invert_sign = 1;
-
- if (src & SLJIT_IMM) {
- FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ (sljit_sw)0x80000000));
- src = TMP_REG1;
- invert_sign = 0;
- }
- else if (!FAST_IS_REG(src)) {
- FAIL_IF(emit_op_mem(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
- src = TMP_REG1;
- }
-
- /* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
- The double precision format has exactly 53 bit precision, so the lower 32 bit represents
- the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
- to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
- point value, we need to subtract 2^53 + 2^31 from the constructed value. */
- FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
- if (invert_sign)
- FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
- FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, TMP_REG1));
- FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, TMP_REG2));
- FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
- FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
- FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, TMP_REG2));
- FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
-
- FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
-
- if (dst & SLJIT_MEM)
- return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1);
- if (op & SLJIT_32)
- return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
- return SLJIT_SUCCESS;
-
-#endif
-}
-
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
@@ -2051,13 +2187,10 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
switch (GET_FLAG_TYPE(op)) {
case SLJIT_UNORDERED_OR_EQUAL:
- case SLJIT_ORDERED_NOT_EQUAL:
return push_inst(compiler, CROR | ((4 + 2) << 21) | ((4 + 2) << 16) | ((4 + 3) << 11));
case SLJIT_UNORDERED_OR_LESS:
- case SLJIT_ORDERED_GREATER_EQUAL:
return push_inst(compiler, CROR | ((4 + 0) << 21) | ((4 + 0) << 16) | ((4 + 3) << 11));
case SLJIT_UNORDERED_OR_GREATER:
- case SLJIT_ORDERED_LESS_EQUAL:
return push_inst(compiler, CROR | ((4 + 1) << 21) | ((4 + 1) << 16) | ((4 + 3) << 11));
}
@@ -2143,18 +2276,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
case SLJIT_ADD_F64:
FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
break;
-
case SLJIT_SUB_F64:
FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
break;
-
case SLJIT_MUL_F64:
FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
break;
-
case SLJIT_DIV_F64:
FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
break;
+ case SLJIT_COPYSIGN_F64:
+ FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? STFS : STFD) | FS(src2) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+ FAIL_IF(push_inst(compiler, LWZ | S(TMP_REG1) | A(SLJIT_SP) | ((op & SLJIT_32) ? TMP_MEM_OFFSET : TMP_MEM_OFFSET_HI)));
+#else /* !SLJIT_CONFIG_PPC_32 */
+ FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? LWZ : LD) | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+#endif /* SLJIT_CONFIG_PPC_32 */
+ FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src1)));
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+ FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(TMP_REG1) | 0));
+#else /* !SLJIT_CONFIG_PPC_32 */
+ FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((op & SLJIT_32) ? 0 : 1)) | A(TMP_REG1) | 0));
+#endif /* SLJIT_CONFIG_PPC_32 */
+ FAIL_IF(push_inst(compiler, BCx | (4 << 21) | (0 << 16) | 8));
+ return push_inst(compiler, FNEG | FD(dst_r) | FB(dst_r));
}
if (dst & SLJIT_MEM)
@@ -2165,22 +2310,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
#undef SELECT_FOP
-/* --------------------------------------------------------------------- */
-/* Other instructions */
-/* --------------------------------------------------------------------- */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f32 value)
{
+ union {
+ sljit_s32 imm;
+ sljit_f32 value;
+ } u;
+
CHECK_ERROR();
- CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
- ADJUST_LOCAL_OFFSET(dst, dstw);
+ CHECK(check_sljit_emit_fset32(compiler, freg, value));
- if (FAST_IS_REG(dst))
- return push_inst(compiler, MFLR | D(dst));
+ u.value = value;
- /* Memory. */
- FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
- return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
+ if (u.imm != 0)
+ FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
+
+ FAIL_IF(push_inst(compiler, STW | S(u.imm != 0 ? TMP_REG1 : TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET));
+ return push_inst(compiler, LFS | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET);
}
/* --------------------------------------------------------------------- */
@@ -2303,7 +2450,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
set_jump(jump, compiler, (sljit_u32)type & SLJIT_REWRITABLE_JUMP);
type &= 0xff;
- if (type == SLJIT_CARRY || type == SLJIT_NOT_CARRY)
+ if ((type | 0x1) == SLJIT_NOT_CARRY)
PTR_FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO)));
/* In PPC, we don't need to touch the arguments. */
@@ -2324,6 +2471,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 arg_types)
{
+ SLJIT_UNUSED_ARG(arg_types);
+
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
@@ -2360,7 +2509,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
#else /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */
src_r = src;
#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */
- } else if (src & SLJIT_IMM) {
+ } else if (src == SLJIT_IMM) {
/* These jumps are converted to jump/call instructions when possible. */
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
FAIL_IF(!jump);
@@ -2390,6 +2539,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
sljit_s32 arg_types,
sljit_s32 src, sljit_sw srcw)
{
+ SLJIT_UNUSED_ARG(arg_types);
+
CHECK_ERROR();
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
@@ -2572,14 +2723,106 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_reg)
+{
+ sljit_ins *ptr;
+ sljit_uw size;
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
+#else /* !SLJIT_CONFIG_PPC_64 */
+ sljit_s32 inp_flags = WORD_DATA | LOAD_DATA;
+#endif /* SLJIT_CONFIG_PPC_64 */
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ if (dst_reg != src2_reg) {
+ if (dst_reg == src1) {
+ src1 = src2_reg;
+ src1w = 0;
+ type ^= 0x1;
+ } else {
+ if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
+ FAIL_IF(push_inst(compiler, OR | S(dst_reg) | A(TMP_REG2) | B(dst_reg)));
+
+ if ((src1 & REG_MASK) == dst_reg)
+ src1 = (src1 & ~REG_MASK) | TMP_REG2;
+
+ if (OFFS_REG(src1) == dst_reg)
+ src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2);
+ }
+
+ FAIL_IF(push_inst(compiler, OR | S(src2_reg) | A(dst_reg) | B(src2_reg)));
+ }
+ }
+
+ if (((type & ~SLJIT_32) | 0x1) == SLJIT_NOT_CARRY)
+ FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO)));
+
+ size = compiler->size;
+
+ ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+ FAIL_IF(!ptr);
+ compiler->size++;
+
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w, TMP_REG1));
+ } else if (src1 == SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
+ if (type & SLJIT_32)
+ src1w = (sljit_s32)src1w;
+#endif /* SLJIT_CONFIG_RISCV_64 */
+ FAIL_IF(load_immediate(compiler, dst_reg, src1w));
+ } else
+ FAIL_IF(push_inst(compiler, OR | S(src1) | A(dst_reg) | B(src1)));
+
+ *ptr = BCx | get_bo_bi_flags(compiler, (type ^ 0x1) & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 2);
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_freg)
{
+ sljit_ins *ptr;
+ sljit_uw size;
+
CHECK_ERROR();
- CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+ CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ if (dst_freg != src2_freg) {
+ if (dst_freg == src1) {
+ src1 = src2_freg;
+ src1w = 0;
+ type ^= 0x1;
+ } else
+ FAIL_IF(push_inst(compiler, FMR | FD(dst_freg) | FB(src2_freg)));
+ }
- return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
+ if (((type & ~SLJIT_32) | 0x1) == SLJIT_NOT_CARRY)
+ FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO)));
+
+ size = compiler->size;
+
+ ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+ FAIL_IF(!ptr);
+ compiler->size++;
+
+ if (src1 & SLJIT_MEM)
+ FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, dst_freg, src1, src1w, TMP_REG1));
+ else
+ FAIL_IF(push_inst(compiler, FMR | FD(dst_freg) | FB(src1)));
+
+ *ptr = BCx | get_bo_bi_flags(compiler, (type ^ 0x1) & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 2);
+ return SLJIT_SUCCESS;
}
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
@@ -2813,7 +3056,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(emit_const(compiler, dst_r, init_value));
if (dst & SLJIT_MEM)
- PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
+ PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, dst_r, dst, dstw, TMP_REG1));
return const_;
}
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_32.c
index b38e6924c8..396c956c19 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_32.c
@@ -27,7 +27,6 @@
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r)
{
SLJIT_UNUSED_ARG(tmp_r);
- SLJIT_ASSERT(dst_r != tmp_r);
if (imm <= SIMM_MAX && imm >= SIMM_MIN)
return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm));
@@ -43,6 +42,76 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r
return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
+{
+ union {
+ sljit_s32 imm[2];
+ sljit_f64 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
+
+ u.value = value;
+
+ if (u.imm[0] != 0)
+ FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0], TMP_REG3));
+ if (u.imm[1] != 0)
+ FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1], TMP_REG3));
+
+ FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-16)));
+ FAIL_IF(push_inst(compiler, SW | RS1(SLJIT_SP) | RS2(u.imm[0] != 0 ? TMP_REG1 : TMP_ZERO) | (8 << 7)));
+ FAIL_IF(push_inst(compiler, SW | RS1(SLJIT_SP) | RS2(u.imm[1] != 0 ? TMP_REG2 : TMP_ZERO) | (12 << 7)));
+ FAIL_IF(push_inst(compiler, FLD | FRD(freg) | RS1(SLJIT_SP) | IMM_I(8)));
+ return push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(16));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ sljit_ins inst;
+ sljit_s32 reg2 = 0;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ if (op & SLJIT_32) {
+ if (op == SLJIT_COPY32_TO_F32)
+ inst = FMV_W_X | RS1(reg) | FRD(freg);
+ else
+ inst = FMV_X_W | FRS1(freg) | RD(reg);
+
+ return push_inst(compiler, inst);
+ }
+
+ FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-16)));
+
+ if (reg & REG_PAIR_MASK) {
+ reg2 = REG_PAIR_SECOND(reg);
+ reg = REG_PAIR_FIRST(reg);
+ }
+
+ if (op == SLJIT_COPY_TO_F64) {
+ if (reg2 != 0)
+ FAIL_IF(push_inst(compiler, SW | RS1(SLJIT_SP) | RS2(reg2) | (8 << 7)));
+ else
+ FAIL_IF(push_inst(compiler, FSW | RS1(SLJIT_SP) | FRS2(freg) | (8 << 7)));
+
+ FAIL_IF(push_inst(compiler, SW | RS1(SLJIT_SP) | RS2(reg) | (12 << 7)));
+ FAIL_IF(push_inst(compiler, FLD | FRD(freg) | RS1(SLJIT_SP) | IMM_I(8)));
+ } else {
+ FAIL_IF(push_inst(compiler, FSD | RS1(SLJIT_SP) | FRS2(freg) | (8 << 7)));
+
+ if (reg2 != 0)
+ FAIL_IF(push_inst(compiler, FMV_X_W | FRS1(freg) | RD(reg2)));
+
+ FAIL_IF(push_inst(compiler, LW | RD(reg) | RS1(SLJIT_SP) | IMM_I(12)));
+ }
+
+ return push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(16));
+}
+
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
{
if ((init_value & 0x800) != 0)
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_64.c
index 32cec7848d..7fcf2c5273 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_64.c
@@ -28,8 +28,6 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r
{
sljit_sw high;
- SLJIT_ASSERT(dst_r != tmp_r);
-
if (imm <= SIMM_MAX && imm >= SIMM_MIN)
return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm));
@@ -81,6 +79,8 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r
return SLJIT_SUCCESS;
}
+ SLJIT_ASSERT(dst_r != tmp_r);
+
high = imm >> 32;
imm = (sljit_s32)imm;
@@ -126,6 +126,45 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r
return push_inst(compiler, XOR | RD(dst_r) | RS1(dst_r) | RS2(tmp_r));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
+{
+ union {
+ sljit_sw imm;
+ sljit_f64 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
+
+ u.value = value;
+
+ if (u.imm == 0)
+ return push_inst(compiler, FMV_W_X | (1 << 25) | RS1(TMP_ZERO) | FRD(freg));
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm, TMP_REG3));
+ return push_inst(compiler, FMV_W_X | (1 << 25) | RS1(TMP_REG1) | FRD(freg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ sljit_ins inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
+ inst = FMV_W_X | RS1(reg) | FRD(freg);
+ else
+ inst = FMV_X_W | FRS1(freg) | RD(reg);
+
+ if (!(op & SLJIT_32))
+ inst |= (sljit_ins)1 << 25;
+
+ return push_inst(compiler, inst);
+}
+
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
{
sljit_sw high;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_common.c
index 58a48c649c..64bd411d9d 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_common.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeRISCV_common.c
@@ -97,16 +97,20 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define FLD (F3(0x3) | OPC(0x7))
#define FLE_S (F7(0x50) | F3(0x0) | OPC(0x53))
#define FLT_S (F7(0x50) | F3(0x1) | OPC(0x53))
-#define FSD (F3(0x3) | OPC(0x27))
/* These conversion opcodes are partly defined. */
#define FCVT_S_D (F7(0x20) | OPC(0x53))
#define FCVT_S_W (F7(0x68) | OPC(0x53))
+#define FCVT_S_WU (F7(0x68) | F12(0x1) | OPC(0x53))
#define FCVT_W_S (F7(0x60) | F3(0x1) | OPC(0x53))
#define FMUL_S (F7(0x8) | F3(0x7) | OPC(0x53))
+#define FMV_X_W (F7(0x70) | F3(0x0) | OPC(0x53))
+#define FMV_W_X (F7(0x78) | F3(0x0) | OPC(0x53))
+#define FSD (F3(0x3) | OPC(0x27))
#define FSGNJ_S (F7(0x10) | F3(0x0) | OPC(0x53))
#define FSGNJN_S (F7(0x10) | F3(0x1) | OPC(0x53))
#define FSGNJX_S (F7(0x10) | F3(0x2) | OPC(0x53))
#define FSUB_S (F7(0x4) | F3(0x7) | OPC(0x53))
+#define FSW (F3(0x2) | OPC(0x27))
#define JAL (OPC(0x6f))
#define JALR (F3(0x0) | OPC(0x67))
#define LD (F3(0x3) | OPC(0x3))
@@ -344,13 +348,12 @@ static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg)
if ((addr & 0x80000000l) != 0)
high = ~high;
- if ((high & 0x800) != 0)
- high += 0x1000;
-
if (flags & PATCH_ABS52) {
SLJIT_ASSERT(addr <= S52_MAX);
inst[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high << 12);
} else {
+ if ((high & 0x800) != 0)
+ high += 0x1000;
inst[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff);
inst[1] = ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high);
inst++;
@@ -531,7 +534,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
{
switch (feature_type) {
case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
+#elif defined(__riscv_float_abi_soft)
+ return 0;
+#else
+ return 1;
+#endif /* SLJIT_IS_FPU_AVAILABLE */
case SLJIT_HAS_ZERO_REGISTER:
+ case SLJIT_HAS_COPY_F32:
+#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
+ case SLJIT_HAS_COPY_F64:
+#endif /* !SLJIT_CONFIG_RISCV_64 */
return 1;
default:
return 0;
@@ -540,7 +554,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
{
- return (type >= SLJIT_ORDERED_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL);
+ switch (type) {
+ case SLJIT_UNORDERED_OR_EQUAL:
+ case SLJIT_ORDERED_NOT_EQUAL:
+ return 2;
+
+ case SLJIT_UNORDERED:
+ case SLJIT_ORDERED:
+ return 1;
+ }
+
+ return 0;
}
/* --------------------------------------------------------------------- */
@@ -610,10 +634,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
if ((local_size & SSIZE_OF(sw)) != 0)
local_size += SSIZE_OF(sw);
- local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
+ local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
}
#else
- local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
+ local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
#endif
local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
compiler->local_size = local_size;
@@ -704,10 +728,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
if ((local_size & SSIZE_OF(sw)) != 0)
local_size += SSIZE_OF(sw);
- local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
+ local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
}
#else
- local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
+ local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
#endif
compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
@@ -915,7 +939,7 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
/* Since tmp can be the same as base or offset registers,
* these might be unavailable after modifying tmp. */
- if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
+ if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA) && reg == TMP_REG2)
tmp_r = reg;
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
@@ -1031,9 +1055,11 @@ static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, slji
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
#define WORD 0
+#define WORD_32 0
#define IMM_EXTEND(v) (IMM_I(v))
#else /* !SLJIT_CONFIG_RISCV_32 */
#define WORD word
+#define WORD_32 0x08
#define IMM_EXTEND(v) (IMM_I((op & SLJIT_32) ? (v) : (32 + (v))))
#endif /* SLJIT_CONFIG_RISCV_32 */
@@ -1041,16 +1067,16 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, slj
{
sljit_s32 is_clz = (GET_OPCODE(op) == SLJIT_CLZ);
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
- sljit_ins word = (op & SLJIT_32) >> 5;
- sljit_ins max = (op & SLJIT_32) ? 32 : 64;
+ sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
+ sljit_ins word_size = (op & SLJIT_32) ? 32 : 64;
#else /* !SLJIT_CONFIG_RISCV_64 */
- sljit_ins max = 32;
+ sljit_ins word_size = 32;
#endif /* SLJIT_CONFIG_RISCV_64 */
SLJIT_ASSERT(WORD == 0 || WORD == 0x8);
/* The OTHER_FLAG is the counter. */
- FAIL_IF(push_inst(compiler, ADDI | WORD | RD(OTHER_FLAG) | RS1(TMP_ZERO) | IMM_I(max)));
+ FAIL_IF(push_inst(compiler, ADDI | WORD | RD(OTHER_FLAG) | RS1(TMP_ZERO) | IMM_I(word_size)));
/* The TMP_REG2 is the next value. */
if (src != TMP_REG2)
@@ -1066,7 +1092,7 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, slj
FAIL_IF(push_inst(compiler, BLT | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)(2 * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20)));
/* The TMP_REG1 is the next shift. */
- FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG1) | RS1(TMP_ZERO) | IMM_I(max)));
+ FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG1) | RS1(TMP_ZERO) | IMM_I(word_size)));
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(TMP_REG2) | IMM_I(0)));
FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_I(1)));
@@ -1081,6 +1107,65 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, slj
return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(OTHER_FLAG) | IMM_I(0));
}
+static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src)
+{
+ SLJIT_UNUSED_ARG(op);
+
+#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
+ if (!(op & SLJIT_32)) {
+ FAIL_IF(push_inst(compiler, LUI | RD(OTHER_FLAG) | 0x10000));
+ FAIL_IF(push_inst(compiler, SRLI | RD(TMP_REG1) | RS1(src) | IMM_I(32)));
+ FAIL_IF(push_inst(compiler, ADDI | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | IMM_I(0xfff)));
+ FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(src) | IMM_I(32)));
+ FAIL_IF(push_inst(compiler, SLLI | RD(EQUAL_FLAG) | RS1(OTHER_FLAG) | IMM_I(32)));
+ FAIL_IF(push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(EQUAL_FLAG)));
+
+ FAIL_IF(push_inst(compiler, SRLI | RD(TMP_REG1) | RS1(dst) | IMM_I(16)));
+ FAIL_IF(push_inst(compiler, AND | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)));
+ FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(OTHER_FLAG)));
+ FAIL_IF(push_inst(compiler, SLLI | RD(EQUAL_FLAG) | RS1(OTHER_FLAG) | IMM_I(8)));
+ FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(dst) | IMM_I(16)));
+ FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(EQUAL_FLAG)));
+ FAIL_IF(push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1)));
+
+ FAIL_IF(push_inst(compiler, SRLI | RD(TMP_REG1) | RS1(dst) | IMM_I(8)));
+ FAIL_IF(push_inst(compiler, AND | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)));
+ FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(OTHER_FLAG)));
+ FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(dst) | IMM_I(8)));
+ return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1));
+ }
+#endif /* SLJIT_CONFIG_RISCV_64 */
+
+ FAIL_IF(push_inst(compiler, SRLI | WORD_32 | RD(TMP_REG1) | RS1(src) | IMM_I(16)));
+ FAIL_IF(push_inst(compiler, LUI | RD(OTHER_FLAG) | 0xff0000));
+ FAIL_IF(push_inst(compiler, SLLI | WORD_32 | RD(dst) | RS1(src) | IMM_I(16)));
+ FAIL_IF(push_inst(compiler, ORI | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | IMM_I(0xff)));
+ FAIL_IF(push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1)));
+
+ FAIL_IF(push_inst(compiler, SRLI | WORD_32 | RD(TMP_REG1) | RS1(dst) | IMM_I(8)));
+ FAIL_IF(push_inst(compiler, AND | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)));
+ FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(OTHER_FLAG)));
+ FAIL_IF(push_inst(compiler, SLLI | WORD_32 | RD(dst) | RS1(dst) | IMM_I(8)));
+ return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1));
+}
+
+static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src)
+{
+#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
+ sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
+ sljit_ins word_size = (op & SLJIT_32) ? 32 : 64;
+#else /* !SLJIT_CONFIG_RISCV_64 */
+ sljit_ins word_size = 32;
+#endif /* SLJIT_CONFIG_RISCV_64 */
+
+ FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(src) | IMM_I(8)));
+ FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src) | IMM_I(word_size - 8)));
+ FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_I(0xff)));
+ FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U16 ? SRLI : SRAI) | WORD | RD(dst) | RS1(dst) | IMM_I(word_size - 16)));
+ return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1));
+}
+
#define EMIT_LOGICAL(op_imm, op_reg) \
if (flags & SRC2_IMM) { \
if (op & SLJIT_SET_Z) \
@@ -1105,7 +1190,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
sljit_s32 is_overflow, is_carry, carry_src_r, is_handled;
sljit_ins op_imm, op_reg;
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
- sljit_ins word = (op & SLJIT_32) >> 5;
+ sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
#endif /* SLJIT_CONFIG_RISCV_64 */
SLJIT_ASSERT(WORD == 0 || WORD == 0x8);
@@ -1174,10 +1259,33 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
return emit_clz_ctz(compiler, op, dst, src2);
+ case SLJIT_REV:
+ case SLJIT_REV_S32:
+#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
+ case SLJIT_REV_U32:
+#endif /* SLJIT_CONFIG_RISCV_32 */
+ SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+ return emit_rev(compiler, op, dst, src2);
+
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+ return emit_rev16(compiler, op, dst, src2);
+
+#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
+ case SLJIT_REV_U32:
+ SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1);
+ FAIL_IF(emit_rev(compiler, op, dst, src2));
+ if (dst == TMP_REG2)
+ return SLJIT_SUCCESS;
+ FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(dst) | IMM_I(32)));
+ return push_inst(compiler, SRLI | RD(dst) | RS1(dst) | IMM_I(32));
+#endif /* SLJIT_CONFIG_RISCV_32 */
+
case SLJIT_ADD:
/* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
- carry_src_r = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+ carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
if (flags & SRC2_IMM) {
if (is_overflow) {
@@ -1233,7 +1341,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(TMP_REG1) | RS2(OTHER_FLAG));
case SLJIT_ADDC:
- carry_src_r = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+ carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
if (flags & SRC2_IMM) {
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2)));
@@ -1280,11 +1388,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
is_handled = 0;
if (flags & SRC2_IMM) {
- if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+ if (GET_FLAG_TYPE(op) == SLJIT_LESS) {
FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2)));
is_handled = 1;
}
- else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+ else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) {
FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2)));
is_handled = 1;
}
@@ -1301,19 +1409,15 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
switch (GET_FLAG_TYPE(op)) {
case SLJIT_LESS:
- case SLJIT_GREATER_EQUAL:
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src1) | RS2(src2)));
break;
case SLJIT_GREATER:
- case SLJIT_LESS_EQUAL:
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src2) | RS2(src1)));
break;
case SLJIT_SIG_LESS:
- case SLJIT_SIG_GREATER_EQUAL:
FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(src1) | RS2(src2)));
break;
case SLJIT_SIG_GREATER:
- case SLJIT_SIG_LESS_EQUAL:
FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(src2) | RS2(src1)));
break;
}
@@ -1336,7 +1440,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
- is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+ is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
if (flags & SRC2_IMM) {
if (is_overflow) {
@@ -1385,7 +1489,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
flags &= ~SRC2_IMM;
}
- is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+ is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
if (flags & SRC2_IMM) {
if (is_carry)
@@ -1534,9 +1638,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
compiler->cache_argw = 0;
}
- if (dst == TMP_REG2) {
+ if (dst == 0) {
SLJIT_ASSERT(HAS_FLAGS(op));
flags |= UNUSED_DEST;
+ dst = TMP_REG2;
}
else if (FAST_IS_REG(dst)) {
dst_r = dst;
@@ -1548,11 +1653,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
flags |= SLOW_DEST;
if (flags & IMM_OP) {
- if ((src2 & SLJIT_IMM) && src2w != 0 && src2w <= SIMM_MAX && src2w >= SIMM_MIN) {
+ if (src2 == SLJIT_IMM && src2w != 0 && src2w <= SIMM_MAX && src2w >= SIMM_MIN) {
flags |= SRC2_IMM;
src2_r = src2w;
}
- else if ((flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w != 0 && src1w <= SIMM_MAX && src1w >= SIMM_MIN) {
+ else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= SIMM_MAX && src1w >= SIMM_MIN) {
flags |= SRC2_IMM;
src2_r = src1w;
@@ -1569,7 +1674,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
src1_r = src1;
flags |= REG1_SOURCE;
}
- else if (src1 & SLJIT_IMM) {
+ else if (src1 == SLJIT_IMM) {
if (src1w) {
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3));
src1_r = TMP_REG1;
@@ -1592,7 +1697,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
dst_r = (sljit_s32)src2_r;
}
- else if (src2 & SLJIT_IMM) {
+ else if (src2 == SLJIT_IMM) {
if (!(flags & SRC2_IMM)) {
if (src2w) {
FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w, TMP_REG3));
@@ -1649,7 +1754,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
- sljit_ins word = (op & SLJIT_32) >> 5;
+ sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
SLJIT_ASSERT(word == 0 || word == 0x8);
#endif /* SLJIT_CONFIG_RISCV_64 */
@@ -1718,32 +1823,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
case SLJIT_MOV_U32:
- return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);
case SLJIT_MOV_S32:
/* Logical operators have no W variant, so sign extended input is necessary for them. */
case SLJIT_MOV32:
- return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);
#endif
case SLJIT_MOV_U8:
- return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
+ return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
case SLJIT_MOV_S8:
- return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
+ return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
case SLJIT_MOV_U16:
- return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
+ return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
case SLJIT_MOV_S16:
- return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
-
- case SLJIT_NOT:
- return emit_op(compiler, SLJIT_XOR | (op & (SLJIT_32 | SLJIT_SET_Z)), flags, dst, dstw, src, srcw, SLJIT_IMM, -1);
+ return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
case SLJIT_CLZ:
case SLJIT_CTZ:
+ case SLJIT_REV:
return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
+ return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
}
SLJIT_UNREACHABLE();
@@ -1766,9 +1877,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
if (op & SLJIT_32) {
flags |= INT_DATA | SIGNED_DATA;
- if (src1 & SLJIT_IMM)
+ if (src1 == SLJIT_IMM)
src1w = (sljit_s32)src1w;
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
src2w = (sljit_s32)src2w;
}
#endif
@@ -1801,7 +1912,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
case SLJIT_MASHR:
case SLJIT_ROTL:
case SLJIT_ROTR:
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
src2w &= 0x1f;
#else /* !SLJIT_CONFIG_RISCV_32 */
@@ -1827,18 +1938,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
SLJIT_SKIP_CHECKS(compiler);
- return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
+ return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 src_dst,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
+ sljit_s32 dst_reg,
+ sljit_s32 src1_reg,
+ sljit_s32 src2_reg,
+ sljit_s32 src3, sljit_sw src3w)
{
sljit_s32 is_left;
sljit_ins ins1, ins2, ins3;
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
- sljit_ins word = (op & SLJIT_32) >> 5;
+ sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
#else /* !SLJIT_CONFIG_RISCV_64 */
@@ -1849,50 +1961,44 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *
SLJIT_ASSERT(WORD == 0 || WORD == 0x8);
CHECK_ERROR();
- CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
+ CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
- if (src_dst == src1) {
+ if (src1_reg == src2_reg) {
SLJIT_SKIP_CHECKS(compiler);
- return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
+ return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
}
- ADJUST_LOCAL_OFFSET(src1, src1w);
- ADJUST_LOCAL_OFFSET(src2, src2w);
+ ADJUST_LOCAL_OFFSET(src3, src3w);
- if (src2 & SLJIT_IMM) {
- src2w &= bit_length - 1;
+ if (src3 == SLJIT_IMM) {
+ src3w &= bit_length - 1;
- if (src2w == 0)
+ if (src3w == 0)
return SLJIT_SUCCESS;
- } else if (src2 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src2, src2w));
- src2 = TMP_REG2;
- }
- if (src1 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG1, src1, src1w));
- src1 = TMP_REG1;
- } else if (src1 & SLJIT_IMM) {
- FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3));
- src1 = TMP_REG1;
- }
-
- if (src2 & SLJIT_IMM) {
if (is_left) {
- ins1 = SLLI | WORD | IMM_I(src2w);
- src2w = bit_length - src2w;
- ins2 = SRLI | WORD | IMM_I(src2w);
+ ins1 = SLLI | WORD | IMM_I(src3w);
+ src3w = bit_length - src3w;
+ ins2 = SRLI | WORD | IMM_I(src3w);
} else {
- ins1 = SRLI | WORD | IMM_I(src2w);
- src2w = bit_length - src2w;
- ins2 = SLLI | WORD | IMM_I(src2w);
+ ins1 = SRLI | WORD | IMM_I(src3w);
+ src3w = bit_length - src3w;
+ ins2 = SLLI | WORD | IMM_I(src3w);
}
- FAIL_IF(push_inst(compiler, ins1 | RD(src_dst) | RS1(src_dst)));
- FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RS1(src1)));
- return push_inst(compiler, OR | RD(src_dst) | RS1(src_dst) | RS2(TMP_REG1));
+ FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RS1(src1_reg)));
+ FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RS1(src2_reg)));
+ return push_inst(compiler, OR | RD(dst_reg) | RS1(dst_reg) | RS2(TMP_REG1));
+ }
+
+ if (src3 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w));
+ src3 = TMP_REG2;
+ } else if (dst_reg == src3) {
+ push_inst(compiler, ADDI | WORD | RD(TMP_REG2) | RS1(src3) | IMM_I(0));
+ src3 = TMP_REG2;
}
if (is_left) {
@@ -1905,21 +2011,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *
ins3 = SLL;
}
- FAIL_IF(push_inst(compiler, ins1 | WORD | RD(src_dst) | RS1(src_dst) | RS2(src2)));
+ FAIL_IF(push_inst(compiler, ins1 | WORD | RD(dst_reg) | RS1(src1_reg) | RS2(src3)));
if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
- FAIL_IF(push_inst(compiler, ins2 | WORD | RD(TMP_REG1) | RS1(src1) | IMM_I(1)));
- FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RS1(src2) | IMM_I((sljit_ins)bit_length - 1)));
- src1 = TMP_REG1;
+ FAIL_IF(push_inst(compiler, ins2 | WORD | RD(TMP_REG1) | RS1(src2_reg) | IMM_I(1)));
+ FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RS1(src3) | IMM_I((sljit_ins)bit_length - 1)));
+ src2_reg = TMP_REG1;
} else
- FAIL_IF(push_inst(compiler, SUB | WORD | RD(TMP_REG2) | RS1(TMP_ZERO) | RS2(src2)));
+ FAIL_IF(push_inst(compiler, SUB | WORD | RD(TMP_REG2) | RS1(TMP_ZERO) | RS2(src3)));
- FAIL_IF(push_inst(compiler, ins3 | WORD | RD(TMP_REG1) | RS1(src1) | RS2(TMP_REG2)));
- return push_inst(compiler, OR | RD(src_dst) | RS1(src_dst) | RS2(TMP_REG1));
+ FAIL_IF(push_inst(compiler, ins3 | WORD | RD(TMP_REG1) | RS1(src2_reg) | RS2(TMP_REG2)));
+ return push_inst(compiler, OR | RD(dst_reg) | RS1(dst_reg) | RS2(TMP_REG1));
}
-#undef WORD
-
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src, sljit_sw srcw)
{
@@ -1947,21 +2051,52 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw)
{
- CHECK_REG_INDEX(check_sljit_get_register_index(reg));
- return reg_map[reg];
+ sljit_s32 dst_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ switch (op) {
+ case SLJIT_FAST_ENTER:
+ if (FAST_IS_REG(dst))
+ return push_inst(compiler, ADDI | RD(dst) | RS1(RETURN_ADDR_REG) | IMM_I(0));
+
+ SLJIT_ASSERT(RETURN_ADDR_REG == TMP_REG2);
+ break;
+ case SLJIT_GET_RETURN_ADDRESS:
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw)));
+ break;
+ }
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw);
+
+ return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
{
- CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+ CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
+
+ if (type == SLJIT_GP_REGISTER)
+ return reg_map[reg];
+
+ if (type != SLJIT_FLOAT_REGISTER)
+ return -1;
+
return freg_map[reg];
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_u32 size)
{
+ SLJIT_UNUSED_ARG(size);
+
CHECK_ERROR();
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
@@ -2008,51 +2143,73 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
#endif
}
-static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
- sljit_ins inst;
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
- sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)) << 21;
-#endif
-
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src & SLJIT_MEM) {
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
-#else
- FAIL_IF(emit_op_mem2(compiler, (flags ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
-#endif
+#else /* SLJIT_CONFIG_RISCV_32 */
+ FAIL_IF(emit_op_mem2(compiler, ((ins & (1 << 21)) ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
+#endif /* !SLJIT_CONFIG_RISCV_32 */
src = TMP_REG1;
- } else if (src & SLJIT_IMM) {
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
- if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
- srcw = (sljit_s32)srcw;
-#endif
-
+ } else if (src == SLJIT_IMM) {
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw, TMP_REG3));
src = TMP_REG1;
}
- inst = FCVT_S_W | FMT(op) | FRD(dst_r) | RS1(src);
+ FAIL_IF(push_inst(compiler, ins | FRD(dst_r) | RS1(src)));
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem2(compiler, DOUBLE_DATA | ((sljit_s32)(~ins >> 24) & 0x2), TMP_FREG1, dst, dstw, 0, 0);
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_ins ins = FCVT_S_W | FMT(op);
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
if (op & SLJIT_32)
- inst |= F3(0x7);
-#else
- inst |= flags;
+ ins |= F3(0x7);
+#else /* !SLJIT_CONFIG_RISCV_32 */
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
+ ins |= (1 << 21);
+ else if (src == SLJIT_IMM)
+ srcw = (sljit_s32)srcw;
if (op != SLJIT_CONV_F64_FROM_S32)
- inst |= F3(0x7);
-#endif
+ ins |= F3(0x7);
+#endif /* SLJIT_CONFIG_RISCV_32 */
- FAIL_IF(push_inst(compiler, inst));
+ return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
+}
- if (dst & SLJIT_MEM)
- return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
- return SLJIT_SUCCESS;
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_ins ins = FCVT_S_WU | FMT(op);
+
+#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
+ if (op & SLJIT_32)
+ ins |= F3(0x7);
+#else /* !SLJIT_CONFIG_RISCV_32 */
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)
+ ins |= (1 << 21);
+ else if (src == SLJIT_IMM)
+ srcw = (sljit_u32)srcw;
+
+ if (op != SLJIT_CONV_F64_FROM_S32)
+ ins |= F3(0x7);
+#endif /* SLJIT_CONFIG_RISCV_32 */
+
+ return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
}
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
@@ -2073,40 +2230,36 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
switch (GET_FLAG_TYPE(op)) {
case SLJIT_F_EQUAL:
- case SLJIT_F_NOT_EQUAL:
case SLJIT_ORDERED_EQUAL:
- case SLJIT_UNORDERED_OR_NOT_EQUAL:
inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2);
break;
case SLJIT_F_LESS:
- case SLJIT_F_GREATER_EQUAL:
case SLJIT_ORDERED_LESS:
- case SLJIT_UNORDERED_OR_GREATER_EQUAL:
inst = FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2);
break;
case SLJIT_ORDERED_GREATER:
- case SLJIT_UNORDERED_OR_LESS_EQUAL:
inst = FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src2) | FRS2(src1);
break;
case SLJIT_F_GREATER:
- case SLJIT_F_LESS_EQUAL:
case SLJIT_UNORDERED_OR_GREATER:
- case SLJIT_ORDERED_LESS_EQUAL:
inst = FLE_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2);
break;
case SLJIT_UNORDERED_OR_LESS:
- case SLJIT_ORDERED_GREATER_EQUAL:
inst = FLE_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src2) | FRS2(src1);
break;
- case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */
- case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */
+ case SLJIT_UNORDERED_OR_EQUAL:
FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2)));
FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(TMP_REG1) | FRS1(src2) | FRS2(src1)));
inst = OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1);
break;
- default: /* SLJIT_UNORDERED, SLJIT_ORDERED */
- FAIL_IF(push_inst(compiler, FADD_S | FMT(op) | FRD(TMP_FREG1) | FRS1(src1) | FRS2(src2)));
- inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(TMP_FREG1) | FRS2(TMP_FREG1);
+ default: /* SLJIT_UNORDERED */
+ if (src1 == src2) {
+ inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src1);
+ break;
+ }
+ FAIL_IF(push_inst(compiler, FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src1)));
+ FAIL_IF(push_inst(compiler, FEQ_S | FMT(op) | RD(TMP_REG1) | FRS1(src2) | FRS2(src2)));
+ inst = AND | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1);
break;
}
@@ -2233,6 +2386,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
case SLJIT_DIV_F64:
FAIL_IF(push_inst(compiler, FDIV_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2)));
break;
+
+ case SLJIT_COPYSIGN_F64:
+ return push_inst(compiler, FSGNJ_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2));
}
if (dst_r == TMP_FREG2)
@@ -2241,24 +2397,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
return SLJIT_SUCCESS;
}
-#undef FLOAT_DATA
-#undef FMT
-
-/* --------------------------------------------------------------------- */
-/* Other instructions */
-/* --------------------------------------------------------------------- */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f32 value)
{
+ union {
+ sljit_s32 imm;
+ sljit_f32 value;
+ } u;
+
CHECK_ERROR();
- CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
- ADJUST_LOCAL_OFFSET(dst, dstw);
+ CHECK(check_sljit_emit_fset32(compiler, freg, value));
+
+ u.value = value;
- if (FAST_IS_REG(dst))
- return push_inst(compiler, ADDI | RD(dst) | RS1(RETURN_ADDR_REG) | IMM_I(0));
+ if (u.imm == 0)
+ return push_inst(compiler, FMV_W_X | RS1(TMP_ZERO) | FRD(freg));
- /* Memory. */
- return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw);
+ FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm, TMP_REG3));
+ return push_inst(compiler, FMV_W_X | RS1(TMP_REG1) | FRD(freg));
}
/* --------------------------------------------------------------------- */
@@ -2287,26 +2443,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
#define BRANCH_LENGTH ((sljit_ins)(7 * sizeof(sljit_ins)) << 7)
#endif
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
+static sljit_ins get_jump_instruction(sljit_s32 type)
{
- struct sljit_jump *jump;
- sljit_ins inst;
-
- CHECK_ERROR_PTR();
- CHECK_PTR(check_sljit_emit_jump(compiler, type));
-
- jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
- PTR_FAIL_IF(!jump);
- set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
- type &= 0xff;
-
switch (type) {
case SLJIT_EQUAL:
- inst = BNE | RS1(EQUAL_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH;
- break;
+ return BNE | RS1(EQUAL_FLAG) | RS2(TMP_ZERO);
case SLJIT_NOT_EQUAL:
- inst = BEQ | RS1(EQUAL_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH;
- break;
+ return BEQ | RS1(EQUAL_FLAG) | RS2(TMP_ZERO);
case SLJIT_LESS:
case SLJIT_GREATER:
case SLJIT_SIG_LESS:
@@ -2315,7 +2458,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
case SLJIT_CARRY:
case SLJIT_F_EQUAL:
case SLJIT_ORDERED_EQUAL:
- case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */
+ case SLJIT_ORDERED_NOT_EQUAL:
case SLJIT_F_LESS:
case SLJIT_ORDERED_LESS:
case SLJIT_ORDERED_GREATER:
@@ -2323,7 +2466,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
case SLJIT_ORDERED_LESS_EQUAL:
case SLJIT_ORDERED_GREATER_EQUAL:
case SLJIT_ORDERED:
- inst = BEQ | RS1(OTHER_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH;
+ return BEQ | RS1(OTHER_FLAG) | RS2(TMP_ZERO);
break;
case SLJIT_GREATER_EQUAL:
case SLJIT_LESS_EQUAL:
@@ -2333,7 +2476,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
case SLJIT_NOT_CARRY:
case SLJIT_F_NOT_EQUAL:
case SLJIT_UNORDERED_OR_NOT_EQUAL:
- case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */
+ case SLJIT_UNORDERED_OR_EQUAL:
case SLJIT_F_GREATER_EQUAL:
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
case SLJIT_UNORDERED_OR_LESS_EQUAL:
@@ -2341,16 +2484,30 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
case SLJIT_UNORDERED_OR_GREATER:
case SLJIT_UNORDERED_OR_LESS:
case SLJIT_UNORDERED:
- inst = BNE | RS1(OTHER_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH;
- break;
+ return BNE | RS1(OTHER_FLAG) | RS2(TMP_ZERO);
default:
/* Not conditional branch. */
- inst = 0;
- break;
+ return 0;
}
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
+{
+ struct sljit_jump *jump;
+ sljit_ins inst;
+
+ CHECK_ERROR_PTR();
+ CHECK_PTR(check_sljit_emit_jump(compiler, type));
+
+ jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+ PTR_FAIL_IF(!jump);
+ set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+ type &= 0xff;
+
+ inst = get_jump_instruction(type);
if (inst != 0) {
- PTR_FAIL_IF(push_inst(compiler, inst));
+ PTR_FAIL_IF(push_inst(compiler, inst | BRANCH_LENGTH));
jump->flags |= IS_COND;
}
@@ -2420,7 +2577,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
src2 = TMP_REG2;
}
- if (src1 & SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
if (src1w != 0) {
PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3));
src1 = TMP_REG1;
@@ -2429,7 +2586,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
src1 = TMP_ZERO;
}
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
if (src2w != 0) {
PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w, TMP_REG3));
src2 = TMP_REG2;
@@ -2499,7 +2656,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
CHECK_ERROR();
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
@@ -2641,16 +2798,110 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_reg)
+{
+ sljit_ins *ptr;
+ sljit_uw size;
+#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
+ sljit_ins word = (sljit_ins)(type & SLJIT_32) >> 5;
+ sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
+#else /* !SLJIT_CONFIG_RISCV_64 */
+ sljit_s32 inp_flags = WORD_DATA | LOAD_DATA;
+#endif /* SLJIT_CONFIG_RISCV_64 */
+
+ SLJIT_ASSERT(WORD == 0 || WORD == 0x8);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ if (dst_reg != src2_reg) {
+ if (dst_reg == src1) {
+ src1 = src2_reg;
+ src1w = 0;
+ type ^= 0x1;
+ } else {
+ if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
+ FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(dst_reg) | IMM_I(0)));
+
+ if ((src1 & REG_MASK) == dst_reg)
+ src1 = (src1 & ~REG_MASK) | TMP_REG2;
+
+ if (OFFS_REG(src1) == dst_reg)
+ src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2);
+ }
+
+ FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst_reg) | RS1(src2_reg) | IMM_I(0)));
+ }
+ }
+
+ size = compiler->size;
+
+ ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+ FAIL_IF(!ptr);
+ compiler->size++;
+
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w));
+ } else if (src1 == SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
+ if (word)
+ src1w = (sljit_s32)src1w;
+#endif /* SLJIT_CONFIG_RISCV_64 */
+ FAIL_IF(load_immediate(compiler, dst_reg, src1w, TMP_REG1));
+ } else
+ FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst_reg) | RS1(src1) | IMM_I(0)));
+
+ *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 9);
+ return SLJIT_SUCCESS;
+}
+
+#undef WORD
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_freg)
{
+ sljit_ins *ptr;
+ sljit_uw size;
+
CHECK_ERROR();
- CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+ CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ if (dst_freg != src2_freg) {
+ if (dst_freg == src1) {
+ src1 = src2_freg;
+ src1w = 0;
+ type ^= 0x1;
+ } else
+ FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(type) | FRD(dst_freg) | FRS1(src2_freg) | FRS2(src2_freg)));
+ }
+
+ size = compiler->size;
+
+ ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+ FAIL_IF(!ptr);
+ compiler->size++;
+
+ if (src1 & SLJIT_MEM)
+ FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, dst_freg, src1, src1w));
+ else
+ FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(type) | FRD(dst_freg) | FRS1(src1) | FRS2(src1)));
- return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
+ *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 9);
+ return SLJIT_SUCCESS;
}
+#undef FLOAT_DATA
+#undef FMT
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 reg,
sljit_s32 mem, sljit_sw memw)
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c b/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c
index 8b51bad9bc..67516f9b32 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c
@@ -47,8 +47,8 @@ static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
- 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1, 14
};
/* there are also a[2-15] available, but they are slower to access and
@@ -83,7 +83,7 @@ static const sljit_gpr r10 = 10; /* reg_map[9] */
static const sljit_gpr r11 = 11; /* reg_map[10] */
static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */
static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */
-static const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */
+static const sljit_gpr r14 = 14; /* reg_map[0]: return address */
static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
/* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
@@ -96,20 +96,16 @@ static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stac
#define tmp0 r0
#define tmp1 r1
-/* TODO(carenas): flags should move to a different register so that
- * link register doesn't need to change
- */
-
/* When reg cannot be unused. */
#define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP)
/* Link register. */
static const sljit_gpr link_r = 14; /* r14 */
-#define TMP_FREG1 (0)
+#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
- 1, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8,
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
+ 0, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, 1
};
#define R0A(r) (r)
@@ -126,7 +122,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
#define F0(r) ((sljit_ins)freg_map[r])
#define F4(r) (R4A((sljit_ins)freg_map[r]))
+#define F12(r) (R12A((sljit_ins)freg_map[r]))
#define F20(r) (R20A((sljit_ins)freg_map[r]))
+#define F28(r) (R28A((sljit_ins)freg_map[r]))
+#define F32(r) (R32A((sljit_ins)freg_map[r]))
#define F36(r) (R36A((sljit_ins)freg_map[r]))
struct sljit_s390x_const {
@@ -141,12 +140,6 @@ static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
return reg_map[r];
}
-static SLJIT_INLINE sljit_gpr fgpr(sljit_s32 r)
-{
- SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(freg_map) / sizeof(freg_map[0])));
- return freg_map[r];
-}
-
/* Size of instruction in bytes. Tags must already be cleared. */
static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins)
{
@@ -217,6 +210,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
}
/* fallthrough */
+ case SLJIT_ATOMIC_STORED:
case SLJIT_F_EQUAL:
case SLJIT_ORDERED_EQUAL:
return cc0;
@@ -236,6 +230,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
return (cc1 | cc2 | cc3);
case SLJIT_LESS:
+ case SLJIT_ATOMIC_NOT_STORED:
return cc1;
case SLJIT_GREATER_EQUAL:
@@ -454,10 +449,12 @@ HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
{
+ sljit_uw dh, dl;
+
SLJIT_ASSERT(is_s20(d));
- sljit_uw dh = (d >> 12) & 0xff;
- sljit_uw dl = (d << 8) & 0xfff00;
+ dh = (d >> 12) & 0xff;
+ dl = ((sljit_uw)d << 8) & 0xfff00;
return (dh | dl) << 8;
}
@@ -899,23 +896,17 @@ static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr t
if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
- /* 6 byte instructions (requires extended immediate facility) */
- if (have_eimm()) {
- if (is_s32(v))
- return push_inst(compiler, lgfi(target, (sljit_s32)v));
+ if (is_s32(v))
+ return push_inst(compiler, lgfi(target, (sljit_s32)v));
- if (((sljit_uw)v >> 32) == 0)
- return push_inst(compiler, llilf(target, (sljit_u32)v));
+ if (((sljit_uw)v >> 32) == 0)
+ return push_inst(compiler, llilf(target, (sljit_u32)v));
- if (((sljit_uw)v << 32) == 0)
- return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
-
- FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
- return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
- }
+ if (((sljit_uw)v << 32) == 0)
+ return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
- /* TODO(mundaym): instruction sequences that don't use extended immediates */
- abort();
+ FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
+ return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
}
struct addr {
@@ -995,24 +986,47 @@ static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
/* May clobber tmp1. */
-static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
- sljit_s32 src, sljit_sw srcw,
- sljit_s32 is_32bit)
+static sljit_s32 load_store_op(struct sljit_compiler *compiler, sljit_gpr reg,
+ sljit_s32 mem, sljit_sw memw,
+ sljit_s32 is_32bit, const sljit_ins* forms)
{
struct addr addr;
- sljit_ins ins;
- SLJIT_ASSERT(src & SLJIT_MEM);
+ SLJIT_ASSERT(mem & SLJIT_MEM);
- if (is_32bit && ((src & OFFS_REG_MASK) || is_u12(srcw) || !is_s20(srcw))) {
- FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
- return push_inst(compiler, 0x58000000 /* l */ | R20A(dst_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
+ if (is_32bit && ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw))) {
+ FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
+ return push_inst(compiler, forms[0] | R20A(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
}
- FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
+ FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
+ return push_inst(compiler, (is_32bit ? forms[1] : forms[2]) | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
+}
- ins = is_32bit ? 0xe30000000058 /* ly */ : 0xe30000000004 /* lg */;
- return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
+static const sljit_ins load_forms[3] = {
+ 0x58000000 /* l */,
+ 0xe30000000058 /* ly */,
+ 0xe30000000004 /* lg */
+};
+
+static const sljit_ins store_forms[3] = {
+ 0x50000000 /* st */,
+ 0xe30000000050 /* sty */,
+ 0xe30000000024 /* stg */
+};
+
+static const sljit_ins load_halfword_forms[3] = {
+ 0x48000000 /* lh */,
+ 0xe30000000078 /* lhy */,
+ 0xe30000000015 /* lgh */
+};
+
+/* May clobber tmp1. */
+static SLJIT_INLINE sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
+ sljit_s32 src, sljit_sw srcw,
+ sljit_s32 is_32bit)
+{
+ return load_store_op(compiler, dst_r, src, srcw, is_32bit, load_forms);
}
/* May clobber tmp1. */
@@ -1032,24 +1046,11 @@ static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr d
}
/* May clobber tmp1. */
-static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
+static SLJIT_INLINE sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 is_32bit)
{
- struct addr addr;
- sljit_ins ins;
-
- SLJIT_ASSERT(dst & SLJIT_MEM);
-
- if (is_32bit && ((dst & OFFS_REG_MASK) || is_u12(dstw) || !is_s20(dstw))) {
- FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1));
- return push_inst(compiler, 0x50000000 /* st */ | R20A(src_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
- }
-
- FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
-
- ins = is_32bit ? 0xe30000000050 /* sty */ : 0xe30000000024 /* stg */;
- return push_inst(compiler, ins | R36A(src_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
+ return load_store_op(compiler, src_r, dst, dstw, is_32bit, store_forms);
}
#undef WHEN
@@ -1058,15 +1059,17 @@ static sljit_s32 emit_move(struct sljit_compiler *compiler,
sljit_gpr dst_r,
sljit_s32 src, sljit_sw srcw)
{
+ sljit_gpr src_r;
+
SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
return push_load_imm_inst(compiler, dst_r, srcw);
if (src & SLJIT_MEM)
return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
- sljit_gpr src_r = gpr(src & REG_MASK);
+ src_r = gpr(src & REG_MASK);
return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
}
@@ -1259,10 +1262,10 @@ static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
sljit_s32 dst, sljit_sw dstw,
sljit_sw srcw)
{
- SLJIT_ASSERT(dst & SLJIT_MEM);
-
sljit_gpr dst_r = tmp1;
+ SLJIT_ASSERT(dst & SLJIT_MEM);
+
if (dst & OFFS_REG_MASK) {
sljit_gpr index = tmp1;
@@ -1567,6 +1570,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
if (jump && jump->addr == j) {
sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
+ sljit_ins op, arg;
+
jump->addr = (sljit_uw)pool_ptr;
/* load address into tmp1 */
@@ -1583,8 +1588,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
*(pool_ptr++) = (sljit_uw)target;
/* branch to tmp1 */
- sljit_ins op = (ins >> 32) & 0xf;
- sljit_ins arg = (ins >> 36) & 0xf;
+ op = (ins >> 32) & 0xf;
+ arg = (ins >> 36) & 0xf;
switch (op) {
case 4: /* brcl -> bcr */
ins = bcr(arg, tmp1);
@@ -1638,6 +1643,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_offset = executable_offset;
compiler->executable_size = ins_size;
+ if (pool_size)
+ compiler->executable_size += (pad_size + pool_size);
code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
SLJIT_CACHE_FLUSH(code, code_ptr);
@@ -1650,12 +1657,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
/* TODO(mundaym): implement all */
switch (feature_type) {
case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
+#else
+ return 1;
+#endif /* SLJIT_IS_FPU_AVAILABLE */
+
case SLJIT_HAS_CLZ:
+ case SLJIT_HAS_REV:
case SLJIT_HAS_ROT:
case SLJIT_HAS_PREFETCH:
+ case SLJIT_HAS_COPY_F32:
+ case SLJIT_HAS_COPY_F64:
+ case SLJIT_HAS_SIMD:
+ case SLJIT_HAS_ATOMIC:
return 1;
+
case SLJIT_HAS_CTZ:
return 2;
+
case SLJIT_HAS_CMOV:
return have_lscond1() ? 1 : 0;
}
@@ -1664,7 +1684,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
{
- return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL);
+ SLJIT_UNUSED_ARG(type);
+ return 0;
}
/* --------------------------------------------------------------------- */
@@ -1741,7 +1762,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
compiler->local_size = local_size;
- FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
+ if (is_s20(-local_size))
+ FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
+ else
+ FAIL_IF(push_inst(compiler, 0xc20400000000 /* slgfi */ | R36A(r15) | (sljit_ins)local_size));
if (options & SLJIT_ENTER_REG_ARG)
return SLJIT_SUCCESS;
@@ -1786,8 +1810,10 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
if (is_u12(local_size))
FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
- else
+ else if (is_s20(local_size))
FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
+ else
+ FAIL_IF(push_inst(compiler, 0xc20a00000000 /* algfi */ | R36A(r15) | (sljit_ins)local_size));
offset = 2 * SSIZE_OF(sw);
if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
@@ -2011,12 +2037,85 @@ static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 o
return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));
}
+static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ struct addr addr;
+ sljit_gpr reg;
+ sljit_ins ins;
+ sljit_s32 opcode = GET_OPCODE(op);
+ sljit_s32 is_16bit = (opcode == SLJIT_REV_U16 || opcode == SLJIT_REV_S16);
+
+ if (dst & SLJIT_MEM) {
+ if (src & SLJIT_MEM) {
+ FAIL_IF(load_store_op(compiler, tmp0, src, srcw, op & SLJIT_32, is_16bit ? load_halfword_forms : load_forms));
+ reg = tmp0;
+ } else
+ reg = gpr(src);
+
+ FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
+
+ if (is_16bit)
+ ins = 0xe3000000003f /* strvh */;
+ else
+ ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */;
+
+ return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
+ }
+
+ reg = gpr(dst);
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
+
+ if (is_16bit)
+ ins = 0xe3000000001f /* lrvh */;
+ else
+ ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */;
+
+ FAIL_IF(push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)));
+
+ if (opcode == SLJIT_REV)
+ return SLJIT_SUCCESS;
+
+ if (is_16bit) {
+ if (op & SLJIT_32)
+ ins = (opcode == SLJIT_REV_U16) ? 0xb9950000 /* llhr */ : 0xb9270000 /* lhr */;
+ else
+ ins = (opcode == SLJIT_REV_U16) ? 0xb9850000 /* llghr */ : 0xb9070000 /* lghr */;
+ } else
+ ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
+
+ return push_inst(compiler, ins | R4A(reg) | R0A(reg));
+ }
+
+ ins = (op & SLJIT_32) ? 0xb91f0000 /* lrvr */ : 0xb90f0000 /* lrvgr */;
+ FAIL_IF(push_inst(compiler, ins | R4A(reg) | R0A(gpr(src))));
+
+ if (opcode == SLJIT_REV)
+ return SLJIT_SUCCESS;
+
+ if (!is_16bit) {
+ ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
+ return push_inst(compiler, ins | R4A(reg) | R0A(reg));
+ }
+
+ if (op & SLJIT_32) {
+ ins = (opcode == SLJIT_REV_U16) ? 0x88000000 /* srl */ : 0x8a000000 /* sra */;
+ return push_inst(compiler, ins | R20A(reg) | 16);
+ }
+
+ ins = (opcode == SLJIT_REV_U16) ? 0xeb000000000c /* srlg */ : 0xeb000000000a /* srag */;
+ return push_inst(compiler, ins | R36A(reg) | R32A(reg) | (48 << 16));
+}
+
/* LEVAL will be defined later with different parameters as needed */
#define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw)
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
{
sljit_ins ins;
struct addr mem;
@@ -2087,7 +2186,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
/* LOAD IMMEDIATE */
- if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) {
+ if (FAST_IS_REG(dst) && src == SLJIT_IMM) {
switch (opcode) {
case SLJIT_MOV_U8:
srcw = (sljit_sw)((sljit_u8)(srcw));
@@ -2166,14 +2265,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
/* STORE and STORE IMMEDIATE */
- if ((dst & SLJIT_MEM)
- && (FAST_IS_REG(src) || (src & SLJIT_IMM))) {
+ if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) {
+ struct addr mem;
sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
- if (src & SLJIT_IMM) {
+
+ if (src == SLJIT_IMM) {
/* TODO(mundaym): MOVE IMMEDIATE? */
FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
}
- struct addr mem;
FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
switch (opcode) {
case SLJIT_MOV_U8:
@@ -2240,39 +2339,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
SLJIT_UNREACHABLE();
}
- SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */
+ SLJIT_ASSERT(src != SLJIT_IMM);
- dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0;
- src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0;
+ dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
+ src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
/* TODO(mundaym): optimize loads and stores */
switch (opcode) {
- case SLJIT_NOT:
- if (src & SLJIT_MEM)
- FAIL_IF(load_word(compiler, src_r, src, srcw, op & SLJIT_32));
-
- /* emulate ~x with x^-1 */
- if (!(op & SLJIT_32)) {
- FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
- if (src_r != dst_r)
- FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
-
- FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
- break;
- }
-
- if (have_eimm())
- FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff)));
- else {
- FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
- if (src_r != dst_r)
- FAIL_IF(push_inst(compiler, lr(dst_r, src_r)));
-
- FAIL_IF(push_inst(compiler, xr(dst_r, tmp1)));
- }
- break;
case SLJIT_CLZ:
case SLJIT_CTZ:
if (src & SLJIT_MEM)
@@ -2280,13 +2355,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));
break;
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
+ op |= SLJIT_32;
+ /* fallthrough */
+ case SLJIT_REV:
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ return sljit_emit_rev(compiler, op, dst, dstw, src, srcw);
default:
SLJIT_UNREACHABLE();
}
- if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW))
- FAIL_IF(update_zero_overflow(compiler, op, dst_r));
-
if (dst & SLJIT_MEM)
return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);
@@ -2337,7 +2417,7 @@ static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
const struct ins_forms *forms;
sljit_ins ins;
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
if (sets_overflow)
ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
@@ -2422,9 +2502,8 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
- if (src2 & SLJIT_IMM) {
- if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w)))
- {
+ if (src2 == SLJIT_IMM) {
+ if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) {
if ((op & SLJIT_32) || is_s32(src2w)) {
ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
@@ -2465,7 +2544,7 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
goto done;
}
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
sljit_sw neg_src2w = -src2w;
if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
@@ -2573,7 +2652,7 @@ static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32
return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
}
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
if (is_s16(src2w)) {
ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
@@ -2680,7 +2759,7 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o
sljit_s32 type = GET_OPCODE(op);
const struct ins_forms *forms;
- if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) {
+ if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) {
sljit_s32 count16 = 0;
sljit_uw imm = (sljit_uw)src2w;
@@ -2705,12 +2784,12 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
- return push_inst(compiler, 0xa7010000 | R20A(src_r) | imm);
+ return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm);
if ((imm & 0x00000000ffff0000ull) != 0)
- return push_inst(compiler, 0xa7000000 | R20A(src_r) | (imm >> 16));
+ return push_inst(compiler, 0xa7000000 /* tmlh */ | R20A(src_r) | (imm >> 16));
if ((imm & 0x0000ffff00000000ull) != 0)
- return push_inst(compiler, 0xa7030000 | R20A(src_r) | (imm >> 32));
- return push_inst(compiler, 0xa7020000 | R20A(src_r) | (imm >> 48));
+ return push_inst(compiler, 0xa7030000 /* tmhl */ | R20A(src_r) | (imm >> 32));
+ return push_inst(compiler, 0xa7020000 /* tmhh */ | R20A(src_r) | (imm >> 48));
}
if (!(op & SLJIT_SET_Z))
@@ -2744,7 +2823,7 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
else
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
- if (!(src2 & SLJIT_IMM)) {
+ if (src2 != SLJIT_IMM) {
if (FAST_IS_REG(src2))
base_r = gpr(src2);
else {
@@ -2804,7 +2883,7 @@ static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op
else
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
- if (!(src2 & SLJIT_IMM)) {
+ if (src2 != SLJIT_IMM) {
if (FAST_IS_REG(src2))
base_r = gpr(src2);
else {
@@ -2814,7 +2893,7 @@ static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op
}
if (GET_OPCODE(op) == SLJIT_ROTR) {
- if (!(src2 & SLJIT_IMM)) {
+ if (src2 != SLJIT_IMM) {
ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
base_r = tmp1;
@@ -2822,7 +2901,7 @@ static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op
src2w = -src2w;
}
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
@@ -2863,7 +2942,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
compiler->mode = op & SLJIT_32;
compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
- if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) {
+ if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) {
src1 ^= src2;
src2 ^= src1;
src1 ^= src2;
@@ -2931,122 +3010,125 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 src_dst,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
+ sljit_s32 dst_reg,
+ sljit_s32 src1_reg,
+ sljit_s32 src2_reg,
+ sljit_s32 src3, sljit_sw src3w)
{
sljit_s32 is_right;
sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
- sljit_gpr src_dst_r = gpr(src_dst);
- sljit_gpr src1_r = tmp0;
- sljit_gpr src2_r = tmp1;
+ sljit_gpr dst_r = gpr(dst_reg);
+ sljit_gpr src1_r = gpr(src1_reg);
+ sljit_gpr src2_r = gpr(src2_reg);
+ sljit_gpr src3_r = tmp1;
sljit_ins ins;
CHECK_ERROR();
- CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
+ CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
- if (src_dst == src1) {
+ if (src1_reg == src2_reg) {
SLJIT_SKIP_CHECKS(compiler);
- return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
+ return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
}
- ADJUST_LOCAL_OFFSET(src1, src1w);
- ADJUST_LOCAL_OFFSET(src2, src2w);
+ ADJUST_LOCAL_OFFSET(src3, src3w);
- if (src1 & SLJIT_MEM)
- FAIL_IF(load_word(compiler, tmp0, src1, src1w, op & SLJIT_32));
- else if (src1 & SLJIT_IMM)
- FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w));
- else
- src1_r = gpr(src1);
-
- if (src2 & SLJIT_IMM) {
- src2w &= bit_length - 1;
+ if (src3 == SLJIT_IMM) {
+ src3w &= bit_length - 1;
- if (src2w == 0)
+ if (src3w == 0)
return SLJIT_SUCCESS;
- } else if (!(src2 & SLJIT_MEM))
- src2_r = gpr(src2);
- else
- FAIL_IF(load_word(compiler, tmp1, src2, src2w, op & SLJIT_32));
- if (src2 & SLJIT_IMM) {
if (op & SLJIT_32) {
- ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
- FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | (sljit_ins)src2w));
+ if (dst_r == src1_r) {
+ ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
+ FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | (sljit_ins)src3w));
+ } else {
+ ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
+ FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
+ }
} else {
ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
- FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | ((sljit_ins)src2w << 16)));
+ FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
}
ins = 0xec0000000055 /* risbg */;
if (is_right) {
- src2w = bit_length - src2w;
- ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src2w) << 16) | ((sljit_ins)src2w << 8);
+ src3w = bit_length - src3w;
+ ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src3w) << 16) | ((sljit_ins)src3w << 8);
} else
- ins |= ((sljit_ins)(64 - src2w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)src2w << 8);
+ ins |= ((sljit_ins)(64 - src3w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)(src3w + 64 - bit_length) << 8);
- return push_inst(compiler, ins | R36A(src_dst_r) | R32A(src1_r));
+ return push_inst(compiler, ins | R36A(dst_r) | R32A(src2_r));
}
+ if (!(src3 & SLJIT_MEM)) {
+ src3_r = gpr(src3);
+
+ if (dst_r == src3_r) {
+ FAIL_IF(push_inst(compiler, 0x1800 /* lr */ | R4A(tmp1) | R0A(src3_r)));
+ src3_r = tmp1;
+ }
+ } else
+ FAIL_IF(load_word(compiler, tmp1, src3, src3w, op & SLJIT_32));
+
if (op & SLJIT_32) {
if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
- if (src2_r != tmp1) {
- FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src2_r) | (59 << 24) | (1 << 23) | (63 << 16)));
- src2_r = tmp1;
+ if (src3_r != tmp1) {
+ FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src3_r) | (59 << 24) | (1 << 23) | (63 << 16)));
+ src3_r = tmp1;
} else
FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
}
- ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
- FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | R12A(src2_r)));
+ if (dst_r == src1_r) {
+ ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
+ FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(src3_r)));
+ } else {
+ ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
+ FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
+ }
- if (src2_r != tmp1) {
+ if (src3_r != tmp1) {
FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));
- FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src2_r)));
+ FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src3_r)));
} else
FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));
- if (src1_r == tmp0) {
- ins = is_right ? 0x89000000 /* sll */ : 0x88000000 /* srl */;
- FAIL_IF(push_inst(compiler, ins | R20A(tmp0) | R12A(tmp1) | 0x1));
- } else {
- ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
- FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1) | (0x1 << 16)));
- }
+ ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
+ FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1) | (0x1 << 16)));
- return push_inst(compiler, 0x1600 /* or */ | R4A(src_dst_r) | R0A(tmp0));
+ return push_inst(compiler, 0x1600 /* or */ | R4A(dst_r) | R0A(tmp0));
}
ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
- FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | R28A(src2_r)));
+ FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;
if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
- if (src2_r != tmp1)
+ if (src3_r != tmp1)
FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));
- FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | (0x1 << 16)));
- src1_r = tmp0;
+ FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | (0x1 << 16)));
+ src2_r = tmp0;
- if (src2_r != tmp1)
- FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src2_r)));
+ if (src3_r != tmp1)
+ FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src3_r)));
else
FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));
} else
- FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src2_r)));
+ FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src3_r)));
- FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1)));
- return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(src_dst_r) | R0A(tmp0));
+ FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1)));
+ return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(dst_r) | R0A(tmp0));
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
- struct sljit_compiler *compiler,
- sljit_s32 op, sljit_s32 src, sljit_sw srcw)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
{
sljit_gpr src_r;
struct addr addr;
@@ -3077,16 +3159,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw)
{
- CHECK_REG_INDEX(check_sljit_get_register_index(reg));
- return (sljit_s32)gpr(reg);
+ sljit_gpr dst_r = link_r;
+ sljit_s32 size;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ switch (op) {
+ case SLJIT_FAST_ENTER:
+ if (FAST_IS_REG(dst))
+ return push_inst(compiler, lgr(gpr(dst), link_r));
+ break;
+ case SLJIT_GET_RETURN_ADDRESS:
+ dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
+
+ size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 2);
+ FAIL_IF(load_word(compiler, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, 0));
+ break;
+ }
+
+ if (dst & SLJIT_MEM)
+ return store_word(compiler, dst_r, dst, dstw, 0);
+
+ return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
{
- CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
- return (sljit_s32)fgpr(reg);
+ CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
+
+ if (type == SLJIT_GP_REGISTER)
+ return (sljit_s32)gpr(reg);
+
+ if (type != SLJIT_FLOAT_REGISTER)
+ return -1;
+
+ return (sljit_s32)freg_map[reg];
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -3177,33 +3289,61 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
return SLJIT_SUCCESS;
}
-static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- sljit_ins ins;
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
src = (sljit_s32)tmp0;
}
else if (src & SLJIT_MEM) {
- FAIL_IF(load_word(compiler, tmp0, src, srcw, GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_S32));
+ FAIL_IF(load_word(compiler, tmp0, src, srcw, ins & 0x100000));
src = (sljit_s32)tmp0;
}
+ FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
+
+ if (dst & SLJIT_MEM)
+ return float_mem(compiler, FLOAT_STORE | ((ins & 0x10000) ? 0 : SLJIT_32), TMP_FREG1, dst, dstw);
+
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_ins ins;
+
+ if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
+ srcw = (sljit_s32)srcw;
+
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
else
ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
- FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
+ return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
+}
- if (dst & SLJIT_MEM)
- return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_ins ins;
- return SLJIT_SUCCESS;
+ if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
+ srcw = (sljit_u32)srcw;
+
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)
+ ins = (op & SLJIT_32) ? 0xb3a00000 /* celgbr */ : 0xb3a10000 /* cdlgbr */;
+ else
+ ins = (op & SLJIT_32) ? 0xb3900000 /* celfbr */ : 0xb3910000 /* cdlfbr */;
+
+ return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
}
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
@@ -3355,21 +3495,91 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
return SLJIT_SUCCESS;
}
-/* --------------------------------------------------------------------- */
-/* Other instructions */
-/* --------------------------------------------------------------------- */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2, sljit_sw src2w)
+{
+ sljit_s32 reg;
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+ ADJUST_LOCAL_OFFSET(src2, src2w);
+
+ if (src2 & SLJIT_MEM) {
+ FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src2, src2w));
+ src2 = TMP_FREG1;
+ }
+
+ if (src1 & SLJIT_MEM) {
+ reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
+ FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), reg, src1, src1w));
+ src1 = reg;
+ }
+
+ return push_inst(compiler, 0xb3720000 /* cpsdr */ | F12(src2) | F4(dst_freg) | F0(src1));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f32 value)
{
+ union {
+ sljit_s32 imm;
+ sljit_f32 value;
+ } u;
+
CHECK_ERROR();
- CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
- ADJUST_LOCAL_OFFSET(dst, dstw);
+ CHECK(check_sljit_emit_fset32(compiler, freg, value));
+
+ u.value = value;
+
+ FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32))));
+ return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
+{
+ union {
+ sljit_sw imm;
+ sljit_f64 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
- if (FAST_IS_REG(dst))
- return push_inst(compiler, lgr(gpr(dst), link_r));
+ u.value = value;
- /* memory */
- return store_word(compiler, link_r, dst, dstw, 0);
+ FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm));
+ return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ sljit_gpr gen_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ gen_r = gpr(reg);
+
+ if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
+ if (op & SLJIT_32) {
+ FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(gen_r) | (32 << 16)));
+ gen_r = tmp0;
+ }
+
+ return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(gen_r));
+ }
+
+ FAIL_IF(push_inst(compiler, 0xb3cd0000 /* lgdr */ | R4A(gen_r) | F0(freg)));
+
+ if (!(op & SLJIT_32))
+ return SLJIT_SUCCESS;
+
+ return push_inst(compiler, 0xeb000000000c /* srlg */ | R36A(gen_r) | R32A(gen_r) | (32 << 16));
}
/* --------------------------------------------------------------------- */
@@ -3394,14 +3604,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
{
+ struct sljit_jump *jump;
sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_jump(compiler, type));
/* record jump */
- struct sljit_jump *jump = (struct sljit_jump *)
- ensure_abuf(compiler, sizeof(struct sljit_jump));
+ jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
PTR_FAIL_IF(!jump);
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
jump->addr = compiler->size;
@@ -3439,7 +3649,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
CHECK_ERROR();
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
}
@@ -3459,6 +3669,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
sljit_s32 arg_types,
sljit_s32 src, sljit_sw srcw)
{
+ SLJIT_UNUSED_ARG(arg_types);
+
CHECK_ERROR();
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
@@ -3490,13 +3702,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
sljit_s32 dst, sljit_sw dstw,
sljit_s32 type)
{
+ sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
+ sljit_gpr loc_r = tmp1;
sljit_u8 mask = get_cc(compiler, type);
CHECK_ERROR();
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
- sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
- sljit_gpr loc_r = tmp1;
switch (GET_OPCODE(op)) {
case SLJIT_AND:
case SLJIT_OR:
@@ -3556,37 +3768,125 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_reg)
{
- sljit_ins mask = get_cc(compiler, type & ~SLJIT_32);
+ sljit_ins mask;
sljit_gpr src_r;
+ sljit_gpr dst_r = gpr(dst_reg);
sljit_ins ins;
CHECK_ERROR();
- CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+ CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
- if (type & SLJIT_32)
- srcw = (sljit_s32)srcw;
+ ADJUST_LOCAL_OFFSET(src1, src1w);
- if (have_lscond2() && (src & SLJIT_IMM) && is_s16(srcw)) {
- ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
- return push_inst(compiler, ins | R36A(gpr(dst_reg)) | (mask << 32) | (sljit_ins)(srcw & 0xffff) << 16);
+ if (dst_reg != src2_reg) {
+ if (src1 == dst_reg) {
+ src1 = src2_reg;
+ src1w = 0;
+ type ^= 0x1;
+ } else {
+ if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
+ FAIL_IF(load_word(compiler, dst_r, src1, src1w, type & SLJIT_32));
+ src1 = src2_reg;
+ src1w = 0;
+ type ^= 0x1;
+ } else
+ FAIL_IF(push_inst(compiler, ((type & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(gpr(src2_reg))));
+ }
}
- if (src & SLJIT_IMM) {
- FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
+ mask = get_cc(compiler, type & ~SLJIT_32);
+
+ if (src1 & SLJIT_MEM) {
+ if (src1 & OFFS_REG_MASK) {
+ src_r = gpr(OFFS_REG(src1));
+
+ if (src1w != 0) {
+ FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(src_r) | ((sljit_ins)(src1w & 0x3) << 16)));
+ src_r = tmp1;
+ }
+
+ FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(src_r) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
+ src_r = tmp1;
+ src1w = 0;
+ } else if (!is_s20(src1w)) {
+ FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
+
+ if (src1 & REG_MASK)
+ FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(tmp1) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
+
+ src_r = tmp1;
+ src1w = 0;
+ } else
+ src_r = gpr(src1 & REG_MASK);
+
+ ins = (type & SLJIT_32) ? 0xeb00000000f2 /* loc */ : 0xeb00000000e2 /* locg */;
+ return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w));
+ }
+
+ if (src1 == SLJIT_IMM) {
+ if (type & SLJIT_32)
+ src1w = (sljit_s32)src1w;
+
+ if (have_lscond2() && is_s16(src1w)) {
+ ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
+ return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16);
+ }
+
+ FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w));
src_r = tmp0;
} else
- src_r = gpr(src);
+ src_r = gpr(src1);
+
+ ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
+ return push_inst(compiler, ins | (mask << 12) | R4A(dst_r) | R0A(src_r));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_freg)
+{
+ sljit_ins ins;
+ struct sljit_label *label;
+ struct sljit_jump *jump;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
- if (have_lscond1()) {
- ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
- return push_inst(compiler, ins | (mask << 12) | R4A(gpr(dst_reg)) | R0A(src_r));
+ if (dst_freg != src2_freg) {
+ if (dst_freg == src1) {
+ src1 = src2_freg;
+ src1w = 0;
+ type ^= 0x1;
+ } else {
+ ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
+ FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src2_freg)));
+ }
}
- return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
+ SLJIT_SKIP_CHECKS(compiler);
+ jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);
+ FAIL_IF(!jump);
+
+ if (!(src1 & SLJIT_MEM)) {
+ ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
+ FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src1)));
+ } else
+ FAIL_IF(float_mem(compiler, FLOAT_LOAD | (type & SLJIT_32), dst_freg, src1, src1w));
+
+ SLJIT_SKIP_CHECKS(compiler);
+ label = sljit_emit_label(compiler);
+ FAIL_IF(!label);
+
+ sljit_set_label(jump, label);
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
@@ -3648,6 +3948,502 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+ struct addr addr;
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (!(srcdst & SLJIT_MEM)) {
+ if (type & SLJIT_SIMD_STORE)
+ ins = F36(srcdst) | F32(freg);
+ else
+ ins = F36(freg) | F32(srcdst);
+
+ return push_inst(compiler, 0xe70000000056 /* vlr */ | ins);
+ }
+
+ FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
+ ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
+
+ if (alignment >= 4)
+ ins |= 4 << 12;
+ else if (alignment == 3)
+ ins |= 3 << 12;
+
+ return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? 0xe7000000000e /* vst */ : 0xe70000000006 /* vl */) | ins);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ struct addr addr;
+ sljit_gpr reg;
+ sljit_sw sign_ext;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
+ return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(freg)
+ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12));
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (src == SLJIT_IMM)
+ return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg));
+
+ return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src) | ((sljit_ins)elem_size << 12));
+ }
+
+ if (src == SLJIT_IMM) {
+ sign_ext = 0x10000;
+
+ switch (elem_size) {
+ case 0:
+ srcw &= 0xff;
+ sign_ext = (sljit_s8)srcw;
+ break;
+ case 1:
+ srcw &= 0xffff;
+ sign_ext = (sljit_s16)srcw;
+ break;
+ case 2:
+ if ((sljit_s32)srcw == (sljit_s16)srcw) {
+ srcw &= 0xffff;
+ sign_ext = (sljit_s16)srcw;
+ } else
+ srcw &= 0xffffffff;
+ break;
+ default:
+ if (srcw == (sljit_s16)srcw) {
+ srcw &= 0xffff;
+ sign_ext = (sljit_s16)srcw;
+ }
+ break;
+ }
+
+ if (sign_ext != 0x10000) {
+ if (sign_ext == 0 || sign_ext == -1)
+ return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)
+ | (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16)));
+
+ return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(freg)
+ | ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12));
+ }
+
+ push_load_imm_inst(compiler, tmp0, srcw);
+ reg = tmp0;
+ } else
+ reg = gpr(src);
+
+ FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ((sljit_ins)elem_size << 12)));
+ return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(freg) | ((sljit_ins)elem_size << 12));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg, sljit_s32 lane_index,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ struct addr addr;
+ sljit_gpr reg;
+ sljit_ins ins = 0;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (srcdst & SLJIT_MEM) {
+ FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
+ ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
+ }
+
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1))
+ return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12));
+
+ if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
+ FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(freg)));
+ srcdst = TMP_FREG1;
+ srcdstw = 0;
+ }
+
+ FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)));
+ }
+
+ if (srcdst & SLJIT_MEM) {
+ switch (elem_size) {
+ case 0:
+ ins |= 0xe70000000000 /* vleb */;
+ break;
+ case 1:
+ ins |= 0xe70000000001 /* vleh */;
+ break;
+ case 2:
+ ins |= 0xe70000000003 /* vlef */;
+ break;
+ default:
+ ins |= 0xe70000000002 /* vleg */;
+ break;
+ }
+
+ /* Convert to vsteb - vsteg */
+ if (type & SLJIT_SIMD_STORE)
+ ins |= 0x8;
+
+ return push_inst(compiler, ins | ((sljit_ins)lane_index << 12));
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (type & SLJIT_SIMD_STORE)
+ return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(freg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12));
+
+ if (elem_size == 3) {
+ if (lane_index == 0)
+ ins = F32(srcdst) | F28(freg) | (1 << 12);
+ else
+ ins = F32(freg) | F28(srcdst);
+
+ return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(freg) | ins);
+ }
+
+ FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12)));
+ return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12));
+ }
+
+ if (srcdst == SLJIT_IMM) {
+ switch (elem_size) {
+ case 0:
+ ins = 0xe70000000040 /* vleib */;
+ srcdstw &= 0xff;
+ break;
+ case 1:
+ ins = 0xe70000000041 /* vleih */;
+ srcdstw &= 0xffff;
+ break;
+ case 2:
+ if ((sljit_s32)srcdstw == (sljit_s16)srcdstw) {
+ srcdstw &= 0xffff;
+ ins = 0xe70000000043 /* vleif */;
+ } else
+ srcdstw &= 0xffffffff;
+ break;
+ default:
+ if (srcdstw == (sljit_s16)srcdstw) {
+ srcdstw &= 0xffff;
+ ins = 0xe70000000042 /* vleig */;
+ }
+ break;
+ }
+
+ if (ins != 0)
+ return push_inst(compiler, ins | F36(freg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12));
+
+ push_load_imm_inst(compiler, tmp0, srcdstw);
+ reg = tmp0;
+ } else
+ reg = gpr(srcdst);
+
+ ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12);
+
+ if (!(type & SLJIT_SIMD_STORE))
+ return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ins);
+
+ FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(freg) | ins));
+
+ if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3)
+ return SLJIT_SUCCESS;
+
+ switch (elem_size) {
+ case 0:
+ ins = 0xb9060000 /* lgbr */;
+ break;
+ case 1:
+ ins = 0xb9070000 /* lghr */;
+ break;
+ default:
+ ins = 0xb9140000 /* lgfr */;
+ break;
+ }
+
+ return push_inst(compiler, ins | R4A(reg) | R0A(reg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_s32 src_lane_index)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src)
+ | ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+ struct addr addr;
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
+ ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
+
+ switch (elem2_size - elem_size) {
+ case 1:
+ ins |= 0xe70000000002 /* vleg */;
+ break;
+ case 2:
+ ins |= 0xe70000000003 /* vlef */;
+ break;
+ default:
+ ins |= 0xe70000000001 /* vleh */;
+ break;
+ }
+
+ FAIL_IF(push_inst(compiler, ins));
+ src = freg;
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(freg) | F32(src) | (2 << 12)));
+ FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(freg) | F32(freg) | (32 << 16) | (3 << 12)));
+ return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(freg) | F32(freg) | (2 << 12));
+ }
+
+ ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(freg);
+
+ do {
+ FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12)));
+ src = freg;
+ } while (++elem_size < elem2_size);
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_gpr dst_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
+
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ switch (elem_size) {
+ case 0:
+ push_load_imm_inst(compiler, tmp0, (sljit_sw)0x4048505860687078);
+ push_load_imm_inst(compiler, tmp1, (sljit_sw)0x0008101820283038);
+ FAIL_IF(push_inst(compiler, 0xe70000000062 /* vlvgp */ | F36(TMP_FREG1) | R32A(tmp1) | R28A(tmp0)));
+ break;
+ case 1:
+ push_load_imm_inst(compiler, tmp0, (sljit_sw)0x0010203040506070);
+ break;
+ case 2:
+ push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808000204060);
+ break;
+ default:
+ push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808080800040);
+ break;
+ }
+
+ if (elem_size != 0)
+ FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12)));
+
+ FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(freg) | F28(TMP_FREG1)));
+
+ dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
+ FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1)
+ | (elem_size == 0 ? ((3 << 16) | (1 << 12)) : (7 << 16))));
+
+ if (dst_r == tmp0)
+ return store_word(compiler, tmp0, dst, dstw, type & SLJIT_32);
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins = 0;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ switch (SLJIT_SIMD_GET_OPCODE(type)) {
+ case SLJIT_SIMD_OP2_AND:
+ ins = 0xe70000000068 /* vn */;
+ break;
+ case SLJIT_SIMD_OP2_OR:
+ ins = 0xe7000000006a /* vo */;
+ break;
+ case SLJIT_SIMD_OP2_XOR:
+ ins = 0xe7000000006d /* vx */;
+ break;
+ }
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ return push_inst(compiler, ins | F36(dst_freg) | F32(src1_freg) | F28(src2_freg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_reg,
+ sljit_s32 mem_reg)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
+
+ SLJIT_SKIP_CHECKS(compiler);
+ return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src_reg,
+ sljit_s32 mem_reg,
+ sljit_s32 temp_reg)
+{
+ sljit_ins mask;
+ sljit_gpr tmp_r = gpr(temp_reg);
+ sljit_gpr mem_r = gpr(mem_reg);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV32:
+ case SLJIT_MOV_U32:
+ return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r));
+ case SLJIT_MOV_U8:
+ mask = 0xff;
+ break;
+ case SLJIT_MOV_U16:
+ mask = 0xffff;
+ break;
+ default:
+ return push_inst(compiler, 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r));
+ }
+
+ /* tmp0 = (src_reg ^ tmp_r) & mask */
+ FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | mask));
+ FAIL_IF(push_inst(compiler, 0xb9e70000 /* xgrk */ | R4A(tmp0) | R0A(gpr(src_reg)) | R12A(tmp_r)));
+ FAIL_IF(push_inst(compiler, 0xa7090000 /* lghi */ | R20A(tmp_r) | 0xfffc));
+ FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp0) | R0A(tmp1)));
+
+ /* tmp0 = tmp0 << (((mem_r ^ 0x3) & 0x3) << 3) */
+ FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | (sljit_ins)((mask == 0xff) ? 0x18 : 0x10)));
+ FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp_r) | R0A(mem_r)));
+ FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp1) | R32A(mem_r) | (59 << 24) | (60 << 16) | (3 << 8)));
+ FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(tmp0) | R28A(tmp1)));
+
+ /* Already computed: tmp_r = mem_r & ~0x3 */
+
+ FAIL_IF(push_inst(compiler, 0x58000000 /* l */ | R20A(tmp1) | R12A(tmp_r)));
+ FAIL_IF(push_inst(compiler, 0x1700 /* x */ | R4A(tmp0) | R0A(tmp1)));
+ return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp1) | R16A(tmp0) | R12A(tmp_r));
+}
+
/* --------------------------------------------------------------------- */
/* Other instructions */
/* --------------------------------------------------------------------- */
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c
index 08da03026d..ba4a1ebbc2 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c
@@ -62,21 +62,19 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
/* Both size flags cannot be switched on. */
SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
/* SSE2 and immediate is not possible. */
- SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
- SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
- && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
- && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
+ SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2));
+ SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
+ & ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
+ SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT);
size &= 0xf;
- inst_size = size;
+ /* The mod r/m byte is always present. */
+ inst_size = size + 1;
- if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
- inst_size++;
- if (flags & EX86_PREF_66)
+ if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
inst_size++;
/* Calculate size of b. */
- inst_size += 1; /* mod r/m byte. */
if (b & SLJIT_MEM) {
if (!(b & REG_MASK))
inst_size += sizeof(sljit_sw);
@@ -87,8 +85,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
inst_size += sizeof(sljit_s8);
else
inst_size += sizeof(sljit_sw);
- }
- else if (reg_map[b & REG_MASK] == 5) {
+ } else if (reg_map[b & REG_MASK] == 5) {
/* Swap registers if possible. */
if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_map[OFFS_REG(b)] != 5)
b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
@@ -105,15 +102,14 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
}
/* Calculate size of a. */
- if (a & SLJIT_IMM) {
+ if (a == SLJIT_IMM) {
if (flags & EX86_BIN_INS) {
if (imma <= 127 && imma >= -128) {
inst_size += 1;
flags |= EX86_BYTE_ARG;
} else
inst_size += 4;
- }
- else if (flags & EX86_SHIFT_INS) {
+ } else if (flags & EX86_SHIFT_INS) {
SLJIT_ASSERT(imma <= 0x1f);
if (imma != 1) {
inst_size++;
@@ -125,8 +121,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
inst_size += sizeof(short);
else
inst_size += sizeof(sljit_sw);
- }
- else
+ } else
SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
@@ -136,27 +131,26 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
INC_SIZE(inst_size);
if (flags & EX86_PREF_F2)
*inst++ = 0xf2;
- if (flags & EX86_PREF_F3)
+ else if (flags & EX86_PREF_F3)
*inst++ = 0xf3;
- if (flags & EX86_PREF_66)
+ else if (flags & EX86_PREF_66)
*inst++ = 0x66;
buf_ptr = inst + size;
/* Encode mod/rm byte. */
if (!(flags & EX86_SHIFT_INS)) {
- if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
+ if ((flags & EX86_BIN_INS) && a == SLJIT_IMM)
*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
- if (a & SLJIT_IMM)
+ if (a == SLJIT_IMM)
*buf_ptr = 0;
else if (!(flags & EX86_SSE2_OP1))
*buf_ptr = U8(reg_map[a] << 3);
else
- *buf_ptr = U8(a << 3);
- }
- else {
- if (a & SLJIT_IMM) {
+ *buf_ptr = U8(freg_map[a] << 3);
+ } else {
+ if (a == SLJIT_IMM) {
if (imma == 1)
*inst = GROUP_SHIFT_1;
else
@@ -167,7 +161,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
}
if (!(b & SLJIT_MEM)) {
- *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : b));
+ *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : freg_map[b]));
buf_ptr++;
} else if (b & REG_MASK) {
reg_map_b = reg_map[b & REG_MASK];
@@ -183,8 +177,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
if (!(b & OFFS_REG_MASK))
*buf_ptr++ |= reg_map_b;
else {
- *buf_ptr++ |= 0x04;
- *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
+ buf_ptr[0] |= 0x04;
+ buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
+ buf_ptr += 2;
}
if (immb != 0 || reg_map_b == 5) {
@@ -195,25 +190,24 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
buf_ptr += sizeof(sljit_sw);
}
}
- }
- else {
+ } else {
if (reg_map_b == 5)
*buf_ptr |= 0x40;
- *buf_ptr++ |= 0x04;
- *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
+ buf_ptr[0] |= 0x04;
+ buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
+ buf_ptr += 2;
if (reg_map_b == 5)
*buf_ptr++ = 0;
}
- }
- else {
+ } else {
*buf_ptr++ |= 0x05;
sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
buf_ptr += sizeof(sljit_sw);
}
- if (a & SLJIT_IMM) {
+ if (a == SLJIT_IMM) {
if (flags & EX86_BYTE_ARG)
*buf_ptr = U8(imma);
else if (flags & EX86_HALF_ARG)
@@ -222,7 +216,67 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
sljit_unaligned_store_sw(buf_ptr, imma);
}
- return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
+ return inst;
+}
+
+static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op,
+ /* The first and second register operand. */
+ sljit_s32 a, sljit_s32 v,
+ /* The general operand (not immediate). */
+ sljit_s32 b, sljit_sw immb)
+{
+ sljit_u8 *inst;
+ sljit_u8 vex = 0;
+ sljit_u8 vex_m = 0;
+ sljit_uw size;
+
+ SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
+ & ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
+
+ if (op & VEX_OP_0F38)
+ vex_m = 0x2;
+ else if (op & VEX_OP_0F3A)
+ vex_m = 0x3;
+
+ if (op & VEX_W) {
+ if (vex_m == 0)
+ vex_m = 0x1;
+
+ vex |= 0x80;
+ }
+
+ if (op & EX86_PREF_66)
+ vex |= 0x1;
+ else if (op & EX86_PREF_F2)
+ vex |= 0x3;
+ else if (op & EX86_PREF_F3)
+ vex |= 0x2;
+
+ op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3);
+
+ if (op & VEX_256)
+ vex |= 0x4;
+
+ vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3));
+
+ size = op & ~(sljit_uw)0xff;
+ size |= (vex_m == 0) ? 3 : 4;
+
+ inst = emit_x86_instruction(compiler, size, a, 0, b, immb);
+ FAIL_IF(!inst);
+
+ if (vex_m == 0) {
+ inst[0] = 0xc5;
+ inst[1] = U8(vex | 0x80);
+ inst[2] = U8(op);
+ return SLJIT_SUCCESS;
+ }
+
+ inst[0] = 0xc4;
+ inst[1] = U8(vex_m | 0xe0);
+ inst[2] = vex;
+ inst[3] = U8(op);
+ return SLJIT_SUCCESS;
}
/* --------------------------------------------------------------------- */
@@ -578,8 +632,6 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
{
- sljit_u8 *inst;
-
CHECK_ERROR();
CHECK(check_sljit_emit_return_void(compiler));
@@ -588,11 +640,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler
FAIL_IF(emit_stack_frame_release(compiler, 0));
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1);
- RET();
- return SLJIT_SUCCESS;
+ return emit_byte(compiler, RET_near);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
@@ -782,7 +830,7 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
offset = stack_size + compiler->local_size;
- if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
+ if (src != SLJIT_IMM && src != SLJIT_R0) {
if (word_arg_count >= 1) {
EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
r2_offset = sizeof(sljit_sw);
@@ -836,7 +884,7 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
stack_size = args_size + SSIZE_OF(sw);
- if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) {
+ if (word_arg_count >= 1 && src != SLJIT_IMM && src != SLJIT_R0) {
r2_offset = SSIZE_OF(sw);
stack_size += SSIZE_OF(sw);
}
@@ -865,7 +913,7 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset);
}
- if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
+ if (src != SLJIT_IMM && src != SLJIT_R0) {
if (word_arg_count >= 1) {
SLJIT_ASSERT(r2_offset == sizeof(sljit_sw));
EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
@@ -952,13 +1000,7 @@ static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 e
sljit_u8 *inst;
BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0);
-
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1);
- RET();
-
- return SLJIT_SUCCESS;
+ return emit_byte(compiler, RET_near);
}
static sljit_s32 tail_call_reg_arg_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
@@ -1075,7 +1117,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
stack_size = type;
FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw));
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
src = SLJIT_R0;
srcw = 0;
}
@@ -1142,30 +1184,20 @@ static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *com
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
sljit_u8 *inst;
- CHECK_ERROR();
- CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
- ADJUST_LOCAL_OFFSET(dst, dstw);
-
CHECK_EXTRA_REGS(dst, dstw, (void)0);
- if (FAST_IS_REG(dst)) {
- /* Unused dest is possible here. */
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!inst);
-
- INC_SIZE(1);
- POP_REG(reg_map[dst]);
- return SLJIT_SUCCESS;
- }
+ /* Unused dest is possible here. */
+ if (FAST_IS_REG(dst))
+ return emit_byte(compiler, U8(POP_r + reg_map[dst]));
/* Memory. */
inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
FAIL_IF(!inst);
- *inst++ = POP_rm;
+ *inst = POP_rm;
return SLJIT_SUCCESS;
}
@@ -1185,8 +1217,8 @@ static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src
else {
inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
FAIL_IF(!inst);
- *inst++ = GROUP_FF;
- *inst |= PUSH_rm;
+ inst[0] = GROUP_FF;
+ inst[1] |= PUSH_rm;
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
FAIL_IF(!inst);
@@ -1197,6 +1229,22 @@ static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src
return SLJIT_SUCCESS;
}
+static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ sljit_s32 options = compiler->options;
+ sljit_s32 saveds = compiler->saveds;
+ sljit_s32 scratches = compiler->scratches;
+
+ saveds = ((scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw);
+
+ /* Saving ebp. */
+ if (!(options & SLJIT_ENTER_REG_ARG))
+ saveds += SSIZE_OF(sw);
+
+ return emit_mov(compiler, dst, dstw, SLJIT_MEM1(SLJIT_SP), compiler->local_size + saveds);
+}
+
/* --------------------------------------------------------------------- */
/* Other operations */
/* --------------------------------------------------------------------- */
@@ -1279,6 +1327,283 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
+ sljit_u8 *inst, *jump_inst1, *jump_inst2;
+ sljit_uw size1, size2;
+
+ /* Binary representation of 0x80000000. */
+ static const sljit_f64 f64_high_bit = (sljit_f64)0x80000000ul;
+
+ CHECK_EXTRA_REGS(src, srcw, (void)0);
+
+ if (!(op & SLJIT_32)) {
+ EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+
+ inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
+ FAIL_IF(!inst);
+ inst[1] |= ROL;
+
+ inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
+ FAIL_IF(!inst);
+ inst[1] |= SHR;
+
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_PREF_F2 | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!inst);
+ INC_SIZE(2);
+ inst[0] = U8(get_jump_code(SLJIT_NOT_CARRY) - 0x10);
+
+ size1 = compiler->size;
+ FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_PREF_F2 | EX86_SSE2, dst_r, SLJIT_MEM0(), (sljit_sw)&f64_high_bit));
+
+ inst[1] = U8(compiler->size - size1);
+
+ if (dst_r == TMP_FREG)
+ return emit_sse2_store(compiler, 0, dst, dstw, TMP_FREG);
+ return SLJIT_SUCCESS;
+ }
+
+ if (!FAST_IS_REG(src)) {
+ EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+ src = TMP_REG1;
+ }
+
+ BINARY_IMM32(CMP, 0, src, 0);
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!inst);
+ INC_SIZE(2);
+ inst[0] = JL_i8;
+ jump_inst1 = inst;
+
+ size1 = compiler->size;
+
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0));
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!inst);
+ INC_SIZE(2);
+ inst[0] = JMP_i8;
+ jump_inst2 = inst;
+
+ size2 = compiler->size;
+
+ jump_inst1[1] = U8(size2 - size1);
+
+ if (src != TMP_REG1)
+ EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
+
+ inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
+ FAIL_IF(!inst);
+ inst[1] |= SHR;
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!inst);
+ INC_SIZE(2);
+ inst[0] = JNC_i8;
+ jump_inst1 = inst;
+
+ size1 = compiler->size;
+
+ BINARY_IMM32(OR, 1, TMP_REG1, 0);
+ jump_inst1[1] = U8(compiler->size - size1);
+
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
+ FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0));
+
+ jump_inst2[1] = U8(compiler->size - size2);
+
+ if (dst_r == TMP_FREG)
+ return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f32 value)
+{
+ sljit_u8 *inst;
+ union {
+ sljit_s32 imm;
+ sljit_f32 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset32(compiler, freg, value));
+
+ u.value = value;
+
+ if (u.imm != 0)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
+ FAIL_IF(!inst);
+ INC_SIZE(4);
+
+ inst[0] = GROUP_66;
+ inst[1] = GROUP_0F;
+
+ if (u.imm == 0) {
+ inst[2] = PXOR_x_xm;
+ inst[3] = U8(freg | (freg << 3) | MOD_REG);
+ } else {
+ inst[2] = MOVD_x_rm;
+ inst[3] = U8(reg_map[TMP_REG1] | (freg << 3) | MOD_REG);
+ }
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
+{
+ sljit_u8 *inst;
+ sljit_s32 tmp_freg = freg;
+ union {
+ sljit_s32 imm[2];
+ sljit_f64 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
+
+ u.value = value;
+
+ if (u.imm[0] == 0) {
+ if (u.imm[1] == 0)
+ return emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0);
+
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]);
+ } else
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[0]);
+
+ FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, TMP_REG1, 0));
+
+ if (u.imm[1] == 0)
+ return SLJIT_SUCCESS;
+
+ if (u.imm[0] == 0) {
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
+ FAIL_IF(!inst);
+ INC_SIZE(4);
+
+ inst[0] = GROUP_0F;
+ inst[1] = SHUFPS_x_xm;
+ inst[2] = U8(MOD_REG | (freg << 3) | freg);
+ inst[3] = 0x51;
+ return SLJIT_SUCCESS;
+ }
+
+ if (u.imm[0] != u.imm[1]) {
+ SLJIT_ASSERT(u.imm[1] != 0 && cpu_feature_list != 0);
+
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]);
+
+ if (cpu_feature_list & CPU_FEATURE_SSE41) {
+ FAIL_IF(emit_groupf_ext(compiler, PINSRD_x_rm_i8 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, TMP_REG1, 0));
+ return emit_byte(compiler, 1);
+ }
+
+ FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, TMP_REG1, 0));
+ tmp_freg = TMP_FREG;
+ }
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
+ FAIL_IF(!inst);
+ INC_SIZE(3);
+
+ inst[0] = GROUP_0F;
+ inst[1] = UNPCKLPS_x_xm;
+ inst[2] = U8(MOD_REG | (freg << 3) | tmp_freg);
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ sljit_u8 *inst;
+ sljit_s32 reg2;
+ sljit_sw regw, reg2w;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ regw = 0;
+ reg2 = 0;
+ reg2w = 0;
+
+ SLJIT_ASSERT(cpu_feature_list != 0);
+
+ if (!(op & SLJIT_32) && (cpu_feature_list & CPU_FEATURE_SSE41)) {
+ if (reg & REG_PAIR_MASK) {
+ reg2 = REG_PAIR_FIRST(reg);
+ reg = REG_PAIR_SECOND(reg);
+
+ CHECK_EXTRA_REGS(reg, regw, (void)0);
+
+ FAIL_IF(emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x)
+ | EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw));
+ } else
+ reg2 = reg;
+
+ CHECK_EXTRA_REGS(reg2, reg2w, (void)0);
+
+ FAIL_IF(emit_groupf_ext(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? PINSRD_x_rm_i8 : PEXTRD_rm_x_i8)
+ | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, reg2, reg2w));
+ return emit_byte(compiler, 1);
+ }
+
+ if (reg & REG_PAIR_MASK) {
+ reg2 = REG_PAIR_SECOND(reg);
+ reg = REG_PAIR_FIRST(reg);
+
+ if (reg == reg2)
+ reg = 0;
+
+ CHECK_EXTRA_REGS(reg2, reg2w, (void)0);
+ }
+
+ CHECK_EXTRA_REGS(reg, regw, (void)0);
+
+ if (op & SLJIT_32)
+ return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x)
+ | EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw);
+
+ if (op == SLJIT_COPY_FROM_F64) {
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
+ FAIL_IF(!inst);
+ INC_SIZE(5);
+
+ inst[0] = GROUP_66;
+ inst[1] = GROUP_0F;
+ inst[2] = PSHUFD_x_xm;
+ inst[3] = U8(MOD_REG | (TMP_FREG << 3) | freg);
+ inst[4] = 1;
+ } else if (reg != 0)
+ FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw));
+
+ if (reg2 != 0)
+ FAIL_IF(emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x)
+ | EX86_PREF_66 | EX86_SSE2_OP1, freg, reg2, reg2w));
+
+ if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
+ FAIL_IF(!inst);
+ INC_SIZE(3);
+
+ inst[0] = GROUP_0F;
+ inst[1] = UNPCKLPS_x_xm;
+ inst[2] = U8(MOD_REG | (freg << 3) | (reg == 0 ? freg : TMP_FREG));
+ } else
+ FAIL_IF(emit_groupf(compiler, MOVD_rm_x | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw));
+
+ return SLJIT_SUCCESS;
+}
+
static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
{
sljit_sw size;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c
index 4e938ffcf3..f313f3f038 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c
@@ -37,9 +37,9 @@ static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg,
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
FAIL_IF(!inst);
INC_SIZE(2 + sizeof(sljit_sw));
- *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
- *inst++ = U8(MOV_r_i32 | (reg_map[reg] & 0x7));
- sljit_unaligned_store_sw(inst, imm);
+ inst[0] = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
+ inst[1] = U8(MOV_r_i32 | reg_lmap[reg]);
+ sljit_unaligned_store_sw(inst + 2, imm);
return SLJIT_SUCCESS;
}
@@ -72,7 +72,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
sljit_uw inst_size;
/* The immediate operand must be 32 bit. */
- SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
+ SLJIT_ASSERT(a != SLJIT_IMM || compiler->mode32 || IS_HALFWORD(imma));
/* Both cannot be switched on. */
SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
/* Size flags not allowed for typed instructions. */
@@ -80,26 +80,24 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
/* Both size flags cannot be switched on. */
SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
/* SSE2 and immediate is not possible. */
- SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
- SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
- && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
- && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
+ SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2));
+ SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
+ & ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
+ SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT);
size &= 0xf;
- inst_size = size;
+ /* The mod r/m byte is always present. */
+ inst_size = size + 1;
if (!compiler->mode32 && !(flags & EX86_NO_REXW))
rex |= REX_W;
else if (flags & EX86_REX)
rex |= REX;
- if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
- inst_size++;
- if (flags & EX86_PREF_66)
+ if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
inst_size++;
/* Calculate size of b. */
- inst_size += 1; /* mod r/m byte. */
if (b & SLJIT_MEM) {
if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
@@ -119,8 +117,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
inst_size += sizeof(sljit_s8);
else
inst_size += sizeof(sljit_s32);
- }
- else if (reg_lmap[b & REG_MASK] == 5) {
+ } else if (reg_lmap[b & REG_MASK] == 5) {
/* Swap registers if possible. */
if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5)
b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
@@ -140,23 +137,26 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
rex |= REX_X;
}
}
- }
- else if (!(flags & EX86_SSE2_OP2)) {
+ } else if (!(flags & EX86_SSE2_OP2)) {
if (reg_map[b] >= 8)
rex |= REX_B;
- }
- else if (freg_map[b] >= 8)
+ } else if (freg_map[b] >= 8)
rex |= REX_B;
- if (a & SLJIT_IMM) {
+ if ((flags & EX86_VEX_EXT) && (rex & 0x3)) {
+ SLJIT_ASSERT(size == 2);
+ size++;
+ inst_size++;
+ }
+
+ if (a == SLJIT_IMM) {
if (flags & EX86_BIN_INS) {
if (imma <= 127 && imma >= -128) {
inst_size += 1;
flags |= EX86_BYTE_ARG;
} else
inst_size += 4;
- }
- else if (flags & EX86_SHIFT_INS) {
+ } else if (flags & EX86_SHIFT_INS) {
SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f));
if (imma != 1) {
inst_size++;
@@ -168,8 +168,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
inst_size += sizeof(short);
else
inst_size += sizeof(sljit_s32);
- }
- else {
+ } else {
SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
if (!(flags & EX86_SSE2_OP1)) {
@@ -186,32 +185,34 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
PTR_FAIL_IF(!inst);
- /* Encoding the byte. */
+ /* Encoding prefixes. */
INC_SIZE(inst_size);
if (flags & EX86_PREF_F2)
*inst++ = 0xf2;
- if (flags & EX86_PREF_F3)
+ else if (flags & EX86_PREF_F3)
*inst++ = 0xf3;
- if (flags & EX86_PREF_66)
+ else if (flags & EX86_PREF_66)
*inst++ = 0x66;
+
+ /* Rex is always the last prefix. */
if (rex)
*inst++ = rex;
+
buf_ptr = inst + size;
/* Encode mod/rm byte. */
if (!(flags & EX86_SHIFT_INS)) {
- if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
+ if ((flags & EX86_BIN_INS) && a == SLJIT_IMM)
*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
- if (a & SLJIT_IMM)
+ if (a == SLJIT_IMM)
*buf_ptr = 0;
else if (!(flags & EX86_SSE2_OP1))
*buf_ptr = U8(reg_lmap[a] << 3);
else
*buf_ptr = U8(freg_lmap[a] << 3);
- }
- else {
- if (a & SLJIT_IMM) {
+ } else {
+ if (a == SLJIT_IMM) {
if (imma == 1)
*inst = GROUP_SHIFT_1;
else
@@ -238,8 +239,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
if (!(b & OFFS_REG_MASK))
*buf_ptr++ |= reg_lmap_b;
else {
- *buf_ptr++ |= 0x04;
- *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3));
+ buf_ptr[0] |= 0x04;
+ buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3));
+ buf_ptr += 2;
}
if (immb != 0 || reg_lmap_b == 5) {
@@ -250,26 +252,26 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
buf_ptr += sizeof(sljit_s32);
}
}
- }
- else {
+ } else {
if (reg_lmap_b == 5)
*buf_ptr |= 0x40;
- *buf_ptr++ |= 0x04;
- *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
+ buf_ptr[0] |= 0x04;
+ buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
+ buf_ptr += 2;
if (reg_lmap_b == 5)
*buf_ptr++ = 0;
}
- }
- else {
- *buf_ptr++ |= 0x04;
- *buf_ptr++ = 0x25;
+ } else {
+ buf_ptr[0] |= 0x04;
+ buf_ptr[1] = 0x25;
+ buf_ptr += 2;
sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
buf_ptr += sizeof(sljit_s32);
}
- if (a & SLJIT_IMM) {
+ if (a == SLJIT_IMM) {
if (flags & EX86_BYTE_ARG)
*buf_ptr = U8(imma);
else if (flags & EX86_HALF_ARG)
@@ -278,7 +280,78 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma);
}
- return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
+ return inst;
+}
+
+static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op,
+ /* The first and second register operand. */
+ sljit_s32 a, sljit_s32 v,
+ /* The general operand (not immediate). */
+ sljit_s32 b, sljit_sw immb)
+{
+ sljit_u8 *inst;
+ sljit_u8 vex = 0;
+ sljit_u8 vex_m = 0;
+ sljit_uw size;
+
+ SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
+ & ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
+
+ op |= EX86_REX;
+
+ if (op & VEX_OP_0F38)
+ vex_m = 0x2;
+ else if (op & VEX_OP_0F3A)
+ vex_m = 0x3;
+
+ if ((op & VEX_W) || ((op & VEX_AUTO_W) && !compiler->mode32)) {
+ if (vex_m == 0)
+ vex_m = 0x1;
+
+ vex |= 0x80;
+ }
+
+ if (op & EX86_PREF_66)
+ vex |= 0x1;
+ else if (op & EX86_PREF_F2)
+ vex |= 0x3;
+ else if (op & EX86_PREF_F3)
+ vex |= 0x2;
+
+ op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3);
+
+ if (op & VEX_256)
+ vex |= 0x4;
+
+ vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3));
+
+ size = op & ~(sljit_uw)0xff;
+ size |= (vex_m == 0) ? (EX86_VEX_EXT | 2) : 3;
+
+ inst = emit_x86_instruction(compiler, size, a, 0, b, immb);
+ FAIL_IF(!inst);
+
+ SLJIT_ASSERT((inst[-1] & 0xf0) == REX);
+
+ /* If X or B is present in REX prefix. */
+ if (vex_m == 0 && inst[-1] & 0x3)
+ vex_m = 0x1;
+
+ if (vex_m == 0) {
+ vex |= U8(((inst[-1] >> 2) ^ 0x1) << 7);
+
+ inst[-1] = 0xc5;
+ inst[0] = vex;
+ inst[1] = U8(op);
+ return SLJIT_SUCCESS;
+ }
+
+ vex_m |= U8((inst[-1] ^ 0x7) << 5);
+ inst[-1] = 0xc4;
+ inst[0] = vex_m;
+ inst[1] = vex;
+ inst[2] = U8(op);
+ return SLJIT_SUCCESS;
}
/* --------------------------------------------------------------------- */
@@ -370,6 +443,12 @@ static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, slji
return code_ptr;
}
+#ifdef _WIN64
+typedef struct {
+ sljit_sw regs[2];
+} sljit_sse2_reg;
+#endif /* _WIN64 */
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
@@ -423,7 +502,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
#ifdef _WIN64
local_size += SLJIT_LOCALS_OFFSET;
- saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
+ saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
if (saved_float_regs_size > 0) {
saved_float_regs_offset = ((local_size + 0xf) & ~0xf);
@@ -532,16 +611,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
tmp = SLJIT_FS0 - fsaveds;
for (i = SLJIT_FS0; i > tmp; i--) {
- inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
- *inst++ = GROUP_0F;
- *inst = MOVAPS_xm_x;
+ FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
saved_float_regs_offset += 16;
}
for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
- inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
- *inst++ = GROUP_0F;
- *inst = MOVAPS_xm_x;
+ FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
saved_float_regs_offset += 16;
}
}
@@ -565,7 +640,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
#ifdef _WIN64
local_size += SLJIT_LOCALS_OFFSET;
- saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
+ saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
if (saved_float_regs_size > 0)
local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size;
@@ -591,7 +666,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
#endif /* _WIN64 */
#ifdef _WIN64
- saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
+ saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
if (saved_float_regs_offset > 0) {
compiler->mode32 = 1;
@@ -599,16 +674,12 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
tmp = SLJIT_FS0 - fsaveds;
for (i = SLJIT_FS0; i > tmp; i--) {
- inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
- *inst++ = GROUP_0F;
- *inst = MOVAPS_x_xm;
+ FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
saved_float_regs_offset += 16;
}
for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
- inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
- *inst++ = GROUP_0F;
- *inst = MOVAPS_x_xm;
+ FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
saved_float_regs_offset += 16;
}
@@ -656,20 +727,13 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
{
- sljit_u8 *inst;
-
CHECK_ERROR();
CHECK(check_sljit_emit_return_void(compiler));
compiler->mode32 = 0;
FAIL_IF(emit_stack_frame_release(compiler, 0));
-
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1);
- RET();
- return SLJIT_SUCCESS;
+ return emit_byte(compiler, RET_near);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
@@ -863,22 +927,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
return sljit_emit_ijump(compiler, type, src, srcw);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
sljit_u8 *inst;
- CHECK_ERROR();
- CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
- ADJUST_LOCAL_OFFSET(dst, dstw);
-
if (FAST_IS_REG(dst)) {
- if (reg_map[dst] < 8) {
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1);
- POP_REG(reg_lmap[dst]);
- return SLJIT_SUCCESS;
- }
+ if (reg_map[dst] < 8)
+ return emit_byte(compiler, U8(POP_r + reg_lmap[dst]));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
@@ -892,7 +947,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
compiler->mode32 = 1;
inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
FAIL_IF(!inst);
- *inst++ = POP_rm;
+ *inst = POP_rm;
return SLJIT_SUCCESS;
}
@@ -922,8 +977,8 @@ static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src
compiler->mode32 = 1;
inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
FAIL_IF(!inst);
- *inst++ = GROUP_FF;
- *inst |= PUSH_rm;
+ inst[0] = GROUP_FF;
+ inst[1] |= PUSH_rm;
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
FAIL_IF(!inst);
@@ -934,6 +989,16 @@ static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src
return SLJIT_SUCCESS;
}
+static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ sljit_s32 saved_regs_size;
+
+ compiler->mode32 = 0;
+ saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
+ return emit_mov(compiler, dst, dstw, SLJIT_MEM1(SLJIT_SP), compiler->local_size + saved_regs_size);
+}
+
/* --------------------------------------------------------------------- */
/* Other operations */
/* --------------------------------------------------------------------- */
@@ -1027,15 +1092,15 @@ static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
compiler->mode32 = 0;
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
if (FAST_IS_REG(dst)) {
- if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
- inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
- FAIL_IF(!inst);
- *inst = MOV_rm_i32;
- return SLJIT_SUCCESS;
- }
- return emit_load_imm64(compiler, dst, srcw);
+ if (!sign || ((sljit_u32)srcw <= 0x7fffffff))
+ return emit_do_imm32(compiler, reg_map[dst] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
+
+ inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
+ FAIL_IF(!inst);
+ *inst = MOV_rm_i32;
+ return SLJIT_SUCCESS;
}
compiler->mode32 = 1;
inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
@@ -1053,10 +1118,10 @@ static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
if (sign) {
inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
FAIL_IF(!inst);
- *inst++ = MOVSXD_r_rm;
+ *inst = MOVSXD_r_rm;
} else {
compiler->mode32 = 1;
- FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
+ EMIT_MOV(compiler, dst_r, 0, src, srcw);
compiler->mode32 = 0;
}
}
@@ -1072,6 +1137,203 @@ static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
return SLJIT_SUCCESS;
}
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
+ sljit_u8 *inst, *jump_inst1, *jump_inst2;
+ sljit_uw size1, size2;
+
+ compiler->mode32 = 0;
+
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {
+ if (src != SLJIT_IMM) {
+ compiler->mode32 = 1;
+ EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+ compiler->mode32 = 0;
+ } else
+ FAIL_IF(emit_do_imm32(compiler, reg_map[TMP_REG1] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[TMP_REG1]), srcw));
+
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
+
+ compiler->mode32 = 1;
+
+ if (dst_r == TMP_FREG)
+ return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
+ return SLJIT_SUCCESS;
+ }
+
+ if (!FAST_IS_REG(src)) {
+ EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+ src = TMP_REG1;
+ }
+
+ BINARY_IMM32(CMP, 0, src, 0);
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!inst);
+ INC_SIZE(2);
+ inst[0] = JL_i8;
+ jump_inst1 = inst;
+
+ size1 = compiler->size;
+
+ compiler->mode32 = 0;
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0));
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!inst);
+ INC_SIZE(2);
+ inst[0] = JMP_i8;
+ jump_inst2 = inst;
+
+ size2 = compiler->size;
+
+ jump_inst1[1] = U8(size2 - size1);
+
+ if (src != TMP_REG1)
+ EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
+
+ EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
+
+ inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
+ FAIL_IF(!inst);
+ inst[1] |= SHR;
+
+ compiler->mode32 = 1;
+ BINARY_IMM32(AND, 1, TMP_REG2, 0);
+
+ compiler->mode32 = 0;
+ inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG2, 0);
+ FAIL_IF(!inst);
+ inst[0] = OR_r_rm;
+
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
+ compiler->mode32 = 1;
+ FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0));
+
+ jump_inst2[1] = U8(compiler->size - size2);
+
+ if (dst_r == TMP_FREG)
+ return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
+ return SLJIT_SUCCESS;
+}
+
+static sljit_s32 sljit_emit_fset(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_u8 rex, sljit_s32 is_zero)
+{
+ sljit_u8 *inst;
+ sljit_u32 size;
+
+ if (is_zero) {
+ rex = freg_map[freg] >= 8 ? (REX_R | REX_B) : 0;
+ } else {
+ if (freg_map[freg] >= 8)
+ rex |= REX_R;
+ if (reg_map[TMP_REG1] >= 8)
+ rex |= REX_B;
+ }
+
+ size = (rex != 0) ? 5 : 4;
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
+ FAIL_IF(!inst);
+ INC_SIZE(size);
+
+ *inst++ = GROUP_66;
+ if (rex != 0)
+ *inst++ = rex;
+ inst[0] = GROUP_0F;
+
+ if (is_zero) {
+ inst[1] = PXOR_x_xm;
+ inst[2] = U8(freg_lmap[freg] | (freg_lmap[freg] << 3) | MOD_REG);
+ } else {
+ inst[1] = MOVD_x_rm;
+ inst[2] = U8(reg_lmap[TMP_REG1] | (freg_lmap[freg] << 3) | MOD_REG);
+ }
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f32 value)
+{
+ union {
+ sljit_s32 imm;
+ sljit_f32 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset32(compiler, freg, value));
+
+ u.value = value;
+
+ if (u.imm != 0) {
+ compiler->mode32 = 1;
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);
+ }
+
+ return sljit_emit_fset(compiler, freg, 0, u.imm == 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
+{
+ union {
+ sljit_sw imm;
+ sljit_f64 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
+
+ u.value = value;
+
+ if (u.imm != 0) {
+ compiler->mode32 = 0;
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);
+ }
+
+ return sljit_emit_fset(compiler, freg, REX_W, u.imm == 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ sljit_u8 *inst;
+ sljit_u32 size;
+ sljit_u8 rex = 0;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ if (!(op & SLJIT_32))
+ rex = REX_W;
+
+ if (freg_map[freg] >= 8)
+ rex |= REX_R;
+
+ if (reg_map[reg] >= 8)
+ rex |= REX_B;
+
+ size = (rex != 0) ? 5 : 4;
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
+ FAIL_IF(!inst);
+ INC_SIZE(size);
+
+ *inst++ = GROUP_66;
+ if (rex != 0)
+ *inst++ = rex;
+ inst[0] = GROUP_0F;
+ inst[1] = GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x;
+ inst[2] = U8(reg_lmap[reg] | (freg_lmap[freg] << 3) | MOD_REG);
+
+ return SLJIT_SUCCESS;
+}
+
static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
{
sljit_s32 tmp, size;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c
index 651942be80..c2c0421349 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c
@@ -24,6 +24,12 @@
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#include <sanitizer/msan_interface.h>
+#endif /* __has_feature(memory_sanitizer) */
+#endif /* defined(__has_feature) */
+
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
{
return "x86" SLJIT_CPUINFO;
@@ -61,15 +67,18 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
15 - R15
*/
-#define TMP_FREG (0)
+#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_FREG (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-/* Last register + 1. */
-#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
- 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
+ 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 5, 7, 6, 4, 3
+};
+
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 0
};
#define CHECK_EXTRA_REGS(p, w, do) \
@@ -81,12 +90,10 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
#else /* SLJIT_CONFIG_X86_32 */
-/* Last register + 1. */
-#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
- Note: avoid to use r12 and r13 for memory addessing
+ Note: avoid to use r12 and r13 for memory addressing
therefore r12 is better to be a higher saved register. */
#ifndef _WIN64
/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
@@ -95,7 +102,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
};
/* low-map. reg_map & 0x7. */
static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
- 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
+ 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
};
#else
/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
@@ -109,12 +116,12 @@ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
#endif
/* Args: xmm0-xmm3 */
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
- 4, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
+ 0, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4
};
/* low-map. freg_map & 0x7. */
-static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
- 4, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
+static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
+ 0, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 4
};
#define REX_W 0x48
@@ -140,155 +147,237 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
#define U8(v) ((sljit_u8)(v))
-
/* Size flags for emit_x86_instruction: */
-#define EX86_BIN_INS 0x0010
-#define EX86_SHIFT_INS 0x0020
-#define EX86_REX 0x0040
-#define EX86_NO_REXW 0x0080
-#define EX86_BYTE_ARG 0x0100
-#define EX86_HALF_ARG 0x0200
-#define EX86_PREF_66 0x0400
-#define EX86_PREF_F2 0x0800
-#define EX86_PREF_F3 0x1000
-#define EX86_SSE2_OP1 0x2000
-#define EX86_SSE2_OP2 0x4000
+#define EX86_BIN_INS ((sljit_uw)0x000010)
+#define EX86_SHIFT_INS ((sljit_uw)0x000020)
+#define EX86_BYTE_ARG ((sljit_uw)0x000040)
+#define EX86_HALF_ARG ((sljit_uw)0x000080)
+/* Size flags for both emit_x86_instruction and emit_vex_instruction: */
+#define EX86_REX ((sljit_uw)0x000100)
+#define EX86_NO_REXW ((sljit_uw)0x000200)
+#define EX86_PREF_66 ((sljit_uw)0x000400)
+#define EX86_PREF_F2 ((sljit_uw)0x000800)
+#define EX86_PREF_F3 ((sljit_uw)0x001000)
+#define EX86_SSE2_OP1 ((sljit_uw)0x002000)
+#define EX86_SSE2_OP2 ((sljit_uw)0x004000)
#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
+#define EX86_VEX_EXT ((sljit_uw)0x008000)
+/* Op flags for emit_vex_instruction: */
+#define VEX_OP_0F38 ((sljit_uw)0x010000)
+#define VEX_OP_0F3A ((sljit_uw)0x020000)
+#define VEX_SSE2_OPV ((sljit_uw)0x040000)
+#define VEX_AUTO_W ((sljit_uw)0x080000)
+#define VEX_W ((sljit_uw)0x100000)
+#define VEX_256 ((sljit_uw)0x200000)
+
+#define EX86_SELECT_66(op) (((op) & SLJIT_32) ? 0 : EX86_PREF_66)
+#define EX86_SELECT_F2_F3(op) (((op) & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2)
/* --------------------------------------------------------------------- */
-/* Instrucion forms */
+/* Instruction forms */
/* --------------------------------------------------------------------- */
-#define ADD (/* BINARY */ 0 << 3)
-#define ADD_EAX_i32 0x05
-#define ADD_r_rm 0x03
-#define ADD_rm_r 0x01
-#define ADDSD_x_xm 0x58
-#define ADC (/* BINARY */ 2 << 3)
-#define ADC_EAX_i32 0x15
-#define ADC_r_rm 0x13
-#define ADC_rm_r 0x11
-#define AND (/* BINARY */ 4 << 3)
-#define AND_EAX_i32 0x25
-#define AND_r_rm 0x23
-#define AND_rm_r 0x21
-#define ANDPD_x_xm 0x54
-#define BSR_r_rm (/* GROUP_0F */ 0xbd)
-#define BSF_r_rm (/* GROUP_0F */ 0xbc)
-#define CALL_i32 0xe8
-#define CALL_rm (/* GROUP_FF */ 2 << 3)
-#define CDQ 0x99
-#define CMOVE_r_rm (/* GROUP_0F */ 0x44)
-#define CMP (/* BINARY */ 7 << 3)
-#define CMP_EAX_i32 0x3d
-#define CMP_r_rm 0x3b
-#define CMP_rm_r 0x39
-#define CVTPD2PS_x_xm 0x5a
-#define CVTSI2SD_x_rm 0x2a
-#define CVTTSD2SI_r_xm 0x2c
-#define DIV (/* GROUP_F7 */ 6 << 3)
-#define DIVSD_x_xm 0x5e
-#define FLDS 0xd9
-#define FLDL 0xdd
-#define FSTPS 0xd9
-#define FSTPD 0xdd
-#define INT3 0xcc
-#define IDIV (/* GROUP_F7 */ 7 << 3)
-#define IMUL (/* GROUP_F7 */ 5 << 3)
-#define IMUL_r_rm (/* GROUP_0F */ 0xaf)
-#define IMUL_r_rm_i8 0x6b
-#define IMUL_r_rm_i32 0x69
-#define JE_i8 0x74
-#define JNE_i8 0x75
-#define JMP_i8 0xeb
-#define JMP_i32 0xe9
-#define JMP_rm (/* GROUP_FF */ 4 << 3)
-#define LEA_r_m 0x8d
-#define LOOP_i8 0xe2
-#define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)
-#define MOV_r_rm 0x8b
-#define MOV_r_i32 0xb8
-#define MOV_rm_r 0x89
-#define MOV_rm_i32 0xc7
-#define MOV_rm8_i8 0xc6
-#define MOV_rm8_r8 0x88
-#define MOVAPS_x_xm 0x28
-#define MOVAPS_xm_x 0x29
-#define MOVSD_x_xm 0x10
-#define MOVSD_xm_x 0x11
-#define MOVSXD_r_rm 0x63
-#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
-#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
-#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
-#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
-#define MUL (/* GROUP_F7 */ 4 << 3)
-#define MULSD_x_xm 0x59
-#define NEG_rm (/* GROUP_F7 */ 3 << 3)
-#define NOP 0x90
-#define NOT_rm (/* GROUP_F7 */ 2 << 3)
-#define OR (/* BINARY */ 1 << 3)
-#define OR_r_rm 0x0b
-#define OR_EAX_i32 0x0d
-#define OR_rm_r 0x09
-#define OR_rm8_r8 0x08
-#define POP_r 0x58
-#define POP_rm 0x8f
-#define POPF 0x9d
-#define PREFETCH 0x18
-#define PUSH_i32 0x68
-#define PUSH_r 0x50
-#define PUSH_rm (/* GROUP_FF */ 6 << 3)
-#define PUSHF 0x9c
-#define ROL (/* SHIFT */ 0 << 3)
-#define ROR (/* SHIFT */ 1 << 3)
-#define RET_near 0xc3
-#define RET_i16 0xc2
-#define SBB (/* BINARY */ 3 << 3)
-#define SBB_EAX_i32 0x1d
-#define SBB_r_rm 0x1b
-#define SBB_rm_r 0x19
-#define SAR (/* SHIFT */ 7 << 3)
-#define SHL (/* SHIFT */ 4 << 3)
-#define SHLD (/* GROUP_0F */ 0xa5)
-#define SHRD (/* GROUP_0F */ 0xad)
-#define SHR (/* SHIFT */ 5 << 3)
-#define SUB (/* BINARY */ 5 << 3)
-#define SUB_EAX_i32 0x2d
-#define SUB_r_rm 0x2b
-#define SUB_rm_r 0x29
-#define SUBSD_x_xm 0x5c
-#define TEST_EAX_i32 0xa9
-#define TEST_rm_r 0x85
-#define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)
-#define UCOMISD_x_xm 0x2e
-#define UNPCKLPD_x_xm 0x14
-#define XCHG_EAX_r 0x90
-#define XCHG_r_rm 0x87
-#define XOR (/* BINARY */ 6 << 3)
-#define XOR_EAX_i32 0x35
-#define XOR_r_rm 0x33
-#define XOR_rm_r 0x31
-#define XORPD_x_xm 0x57
-
-#define GROUP_0F 0x0f
-#define GROUP_F3 0xf3
-#define GROUP_F7 0xf7
-#define GROUP_FF 0xff
-#define GROUP_BINARY_81 0x81
-#define GROUP_BINARY_83 0x83
-#define GROUP_SHIFT_1 0xd1
-#define GROUP_SHIFT_N 0xc1
-#define GROUP_SHIFT_CL 0xd3
-
-#define MOD_REG 0xc0
-#define MOD_DISP8 0x40
-
-#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s))
-
-#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r)))
-#define POP_REG(r) (*inst++ = U8(POP_r + (r)))
-#define RET() (*inst++ = RET_near)
-#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)
+#define ADD (/* BINARY */ 0 << 3)
+#define ADD_EAX_i32 0x05
+#define ADD_r_rm 0x03
+#define ADD_rm_r 0x01
+#define ADDSD_x_xm 0x58
+#define ADC (/* BINARY */ 2 << 3)
+#define ADC_EAX_i32 0x15
+#define ADC_r_rm 0x13
+#define ADC_rm_r 0x11
+#define AND (/* BINARY */ 4 << 3)
+#define AND_EAX_i32 0x25
+#define AND_r_rm 0x23
+#define AND_rm_r 0x21
+#define ANDPD_x_xm 0x54
+#define BSR_r_rm (/* GROUP_0F */ 0xbd)
+#define BSF_r_rm (/* GROUP_0F */ 0xbc)
+#define BSWAP_r (/* GROUP_0F */ 0xc8)
+#define CALL_i32 0xe8
+#define CALL_rm (/* GROUP_FF */ 2 << 3)
+#define CDQ 0x99
+#define CMOVE_r_rm (/* GROUP_0F */ 0x44)
+#define CMP (/* BINARY */ 7 << 3)
+#define CMP_EAX_i32 0x3d
+#define CMP_r_rm 0x3b
+#define CMP_rm_r 0x39
+#define CMPS_x_xm 0xc2
+#define CMPXCHG_rm_r 0xb1
+#define CMPXCHG_rm8_r 0xb0
+#define CVTPD2PS_x_xm 0x5a
+#define CVTPS2PD_x_xm 0x5a
+#define CVTSI2SD_x_rm 0x2a
+#define CVTTSD2SI_r_xm 0x2c
+#define DIV (/* GROUP_F7 */ 6 << 3)
+#define DIVSD_x_xm 0x5e
+#define EXTRACTPS_x_xm 0x17
+#define FLDS 0xd9
+#define FLDL 0xdd
+#define FSTPS 0xd9
+#define FSTPD 0xdd
+#define INSERTPS_x_xm 0x21
+#define INT3 0xcc
+#define IDIV (/* GROUP_F7 */ 7 << 3)
+#define IMUL (/* GROUP_F7 */ 5 << 3)
+#define IMUL_r_rm (/* GROUP_0F */ 0xaf)
+#define IMUL_r_rm_i8 0x6b
+#define IMUL_r_rm_i32 0x69
+#define JL_i8 0x7c
+#define JE_i8 0x74
+#define JNC_i8 0x73
+#define JNE_i8 0x75
+#define JMP_i8 0xeb
+#define JMP_i32 0xe9
+#define JMP_rm (/* GROUP_FF */ 4 << 3)
+#define LEA_r_m 0x8d
+#define LOOP_i8 0xe2
+#define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)
+#define MOV_r_rm 0x8b
+#define MOV_r_i32 0xb8
+#define MOV_rm_r 0x89
+#define MOV_rm_i32 0xc7
+#define MOV_rm8_i8 0xc6
+#define MOV_rm8_r8 0x88
+#define MOVAPS_x_xm 0x28
+#define MOVAPS_xm_x 0x29
+#define MOVD_x_rm 0x6e
+#define MOVD_rm_x 0x7e
+#define MOVDDUP_x_xm 0x12
+#define MOVDQA_x_xm 0x6f
+#define MOVDQA_xm_x 0x7f
+#define MOVHLPS_x_x 0x12
+#define MOVHPD_m_x 0x17
+#define MOVHPD_x_m 0x16
+#define MOVLHPS_x_x 0x16
+#define MOVLPD_m_x 0x13
+#define MOVLPD_x_m 0x12
+#define MOVMSKPS_r_x (/* GROUP_0F */ 0x50)
+#define MOVQ_x_xm (/* GROUP_0F */ 0x7e)
+#define MOVSD_x_xm 0x10
+#define MOVSD_xm_x 0x11
+#define MOVSHDUP_x_xm 0x16
+#define MOVSXD_r_rm 0x63
+#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
+#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
+#define MOVUPS_x_xm 0x10
+#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
+#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
+#define MUL (/* GROUP_F7 */ 4 << 3)
+#define MULSD_x_xm 0x59
+#define NEG_rm (/* GROUP_F7 */ 3 << 3)
+#define NOP 0x90
+#define NOT_rm (/* GROUP_F7 */ 2 << 3)
+#define OR (/* BINARY */ 1 << 3)
+#define OR_r_rm 0x0b
+#define OR_EAX_i32 0x0d
+#define OR_rm_r 0x09
+#define OR_rm8_r8 0x08
+#define ORPD_x_xm 0x56
+#define PACKSSWB_x_xm (/* GROUP_0F */ 0x63)
+#define PAND_x_xm 0xdb
+#define PCMPEQD_x_xm 0x76
+#define PINSRB_x_rm_i8 0x20
+#define PINSRW_x_rm_i8 0xc4
+#define PINSRD_x_rm_i8 0x22
+#define PEXTRB_rm_x_i8 0x14
+#define PEXTRW_rm_x_i8 0x15
+#define PEXTRD_rm_x_i8 0x16
+#define PMOVMSKB_r_x (/* GROUP_0F */ 0xd7)
+#define PMOVSXBD_x_xm 0x21
+#define PMOVSXBQ_x_xm 0x22
+#define PMOVSXBW_x_xm 0x20
+#define PMOVSXDQ_x_xm 0x25
+#define PMOVSXWD_x_xm 0x23
+#define PMOVSXWQ_x_xm 0x24
+#define PMOVZXBD_x_xm 0x31
+#define PMOVZXBQ_x_xm 0x32
+#define PMOVZXBW_x_xm 0x30
+#define PMOVZXDQ_x_xm 0x35
+#define PMOVZXWD_x_xm 0x33
+#define PMOVZXWQ_x_xm 0x34
+#define POP_r 0x58
+#define POP_rm 0x8f
+#define POPF 0x9d
+#define POR_x_xm 0xeb
+#define PREFETCH 0x18
+#define PSHUFB_x_xm 0x00
+#define PSHUFD_x_xm 0x70
+#define PSHUFLW_x_xm 0x70
+#define PSRLDQ_x 0x73
+#define PSLLD_x_i8 0x72
+#define PSLLQ_x_i8 0x73
+#define PUSH_i32 0x68
+#define PUSH_r 0x50
+#define PUSH_rm (/* GROUP_FF */ 6 << 3)
+#define PUSHF 0x9c
+#define PXOR_x_xm 0xef
+#define ROL (/* SHIFT */ 0 << 3)
+#define ROR (/* SHIFT */ 1 << 3)
+#define RET_near 0xc3
+#define RET_i16 0xc2
+#define SBB (/* BINARY */ 3 << 3)
+#define SBB_EAX_i32 0x1d
+#define SBB_r_rm 0x1b
+#define SBB_rm_r 0x19
+#define SAR (/* SHIFT */ 7 << 3)
+#define SHL (/* SHIFT */ 4 << 3)
+#define SHLD (/* GROUP_0F */ 0xa5)
+#define SHRD (/* GROUP_0F */ 0xad)
+#define SHR (/* SHIFT */ 5 << 3)
+#define SHUFPS_x_xm 0xc6
+#define SUB (/* BINARY */ 5 << 3)
+#define SUB_EAX_i32 0x2d
+#define SUB_r_rm 0x2b
+#define SUB_rm_r 0x29
+#define SUBSD_x_xm 0x5c
+#define TEST_EAX_i32 0xa9
+#define TEST_rm_r 0x85
+#define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)
+#define UCOMISD_x_xm 0x2e
+#define UNPCKLPD_x_xm 0x14
+#define UNPCKLPS_x_xm 0x14
+#define VBROADCASTSD_x_xm 0x19
+#define VBROADCASTSS_x_xm 0x18
+#define VEXTRACTF128_x_ym 0x19
+#define VEXTRACTI128_x_ym 0x39
+#define VINSERTF128_y_y_xm 0x18
+#define VINSERTI128_y_y_xm 0x38
+#define VPBROADCASTB_x_xm 0x78
+#define VPBROADCASTD_x_xm 0x58
+#define VPBROADCASTQ_x_xm 0x59
+#define VPBROADCASTW_x_xm 0x79
+#define VPERMPD_y_ym 0x01
+#define VPERMQ_y_ym 0x00
+#define XCHG_EAX_r 0x90
+#define XCHG_r_rm 0x87
+#define XOR (/* BINARY */ 6 << 3)
+#define XOR_EAX_i32 0x35
+#define XOR_r_rm 0x33
+#define XOR_rm_r 0x31
+#define XORPD_x_xm 0x57
+
+#define GROUP_0F 0x0f
+#define GROUP_66 0x66
+#define GROUP_F3 0xf3
+#define GROUP_F7 0xf7
+#define GROUP_FF 0xff
+#define GROUP_BINARY_81 0x81
+#define GROUP_BINARY_83 0x83
+#define GROUP_SHIFT_1 0xd1
+#define GROUP_SHIFT_N 0xc1
+#define GROUP_SHIFT_CL 0xd3
+#define GROUP_LOCK 0xf0
+
+#define MOD_REG 0xc0
+#define MOD_DISP8 0x40
+
+#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s))
+
+#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r)))
+#define POP_REG(r) (*inst++ = U8(POP_r + (r)))
+#define RET() (*inst++ = RET_near)
+#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)
/* Multithreading does not affect these static variables, since they store
built-in CPU features. Therefore they can be overwritten by different threads
@@ -297,9 +386,12 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
#define CPU_FEATURE_SSE2 0x002
#endif
-#define CPU_FEATURE_LZCNT 0x004
-#define CPU_FEATURE_TZCNT 0x008
-#define CPU_FEATURE_CMOV 0x010
+#define CPU_FEATURE_SSE41 0x004
+#define CPU_FEATURE_LZCNT 0x008
+#define CPU_FEATURE_TZCNT 0x010
+#define CPU_FEATURE_CMOV 0x020
+#define CPU_FEATURE_AVX 0x040
+#define CPU_FEATURE_AVX2 0x080
static sljit_u32 cpu_feature_list = 0;
@@ -332,124 +424,124 @@ static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
/* Utility functions */
/******************************************************/
-static void get_cpu_features(void)
+static void execute_cpu_id(sljit_u32 info[4])
{
- sljit_u32 feature_list = CPU_FEATURE_DETECTED;
- sljit_u32 value;
-
#if defined(_MSC_VER) && _MSC_VER >= 1400
- int CPUInfo[4];
-
- __cpuid(CPUInfo, 0);
- if (CPUInfo[0] >= 7) {
- __cpuidex(CPUInfo, 7, 0);
- if (CPUInfo[1] & 0x8)
- feature_list |= CPU_FEATURE_TZCNT;
- }
-
- __cpuid(CPUInfo, (int)0x80000001);
- if (CPUInfo[2] & 0x20)
- feature_list |= CPU_FEATURE_LZCNT;
-
- __cpuid(CPUInfo, 1);
- value = (sljit_u32)CPUInfo[3];
+ __cpuidex((int*)info, (int)info[0], (int)info[2]);
-#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
+#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__)
/* AT&T syntax. */
__asm__ (
- "movl $0x0, %%eax\n"
- "lzcnt %%eax, %%eax\n"
- "setnz %%al\n"
- "movl %%eax, %0\n"
- : "=g" (value)
- :
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- : "eax"
-#else
- : "rax"
-#endif
- );
-
- if (value & 0x1)
- feature_list |= CPU_FEATURE_LZCNT;
-
- __asm__ (
- "movl $0x0, %%eax\n"
- "tzcnt %%eax, %%eax\n"
- "setnz %%al\n"
- "movl %%eax, %0\n"
- : "=g" (value)
- :
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- : "eax"
-#else
- : "rax"
-#endif
- );
-
- if (value & 0x1)
- feature_list |= CPU_FEATURE_TZCNT;
-
- __asm__ (
- "movl $0x1, %%eax\n"
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- /* On x86-32, there is no red zone, so this
- should work (no need for a local variable). */
- "push %%ebx\n"
-#endif
+ "movl %0, %%esi\n"
+ "movl (%%esi), %%eax\n"
+ "movl 8(%%esi), %%ecx\n"
+ "pushl %%ebx\n"
"cpuid\n"
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- "pop %%ebx\n"
-#endif
- "movl %%edx, %0\n"
- : "=g" (value)
+ "movl %%eax, (%%esi)\n"
+ "movl %%ebx, 4(%%esi)\n"
+ "popl %%ebx\n"
+ "movl %%ecx, 8(%%esi)\n"
+ "movl %%edx, 12(%%esi)\n"
+#else /* !SLJIT_CONFIG_X86_32 */
+ "movq %0, %%rsi\n"
+ "movl (%%rsi), %%eax\n"
+ "movl 8(%%rsi), %%ecx\n"
+ "cpuid\n"
+ "movl %%eax, (%%rsi)\n"
+ "movl %%ebx, 4(%%rsi)\n"
+ "movl %%ecx, 8(%%rsi)\n"
+ "movl %%edx, 12(%%rsi)\n"
+#endif /* SLJIT_CONFIG_X86_32 */
:
+ : "r" (info)
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- : "%eax", "%ecx", "%edx"
-#else
- : "%rax", "%rbx", "%rcx", "%rdx"
-#endif
+ : "memory", "eax", "ecx", "edx", "esi"
+#else /* !SLJIT_CONFIG_X86_32 */
+ : "memory", "rax", "rbx", "rcx", "rdx", "rsi"
+#endif /* SLJIT_CONFIG_X86_32 */
);
-#else /* _MSC_VER && _MSC_VER >= 1400 */
+#else /* _MSC_VER < 1400 */
/* Intel syntax. */
__asm {
- mov eax, 0
- lzcnt eax, eax
- setnz al
- mov value, eax
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ mov esi, info
+ mov eax, [esi]
+ mov ecx, [esi + 8]
+ cpuid
+ mov [esi], eax
+ mov [esi + 4], ebx
+ mov [esi + 8], ecx
+ mov [esi + 12], edx
+#else /* !SLJIT_CONFIG_X86_32 */
+ mov rsi, info
+ mov eax, [rsi]
+ mov ecx, [rsi + 8]
+ cpuid
+ mov [rsi], eax
+ mov [rsi + 4], ebx
+ mov [rsi + 8], ecx
+ mov [rsi + 12], edx
+#endif /* SLJIT_CONFIG_X86_32 */
}
- if (value & 0x1)
- feature_list |= CPU_FEATURE_LZCNT;
+#endif /* _MSC_VER && _MSC_VER >= 1400 */
- __asm {
- mov eax, 0
- tzcnt eax, eax
- setnz al
- mov value, eax
- }
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+__msan_unpoison(info, 4 * sizeof(sljit_u32));
+#endif /* __has_feature(memory_sanitizer) */
+#endif /* defined(__has_feature) */
- if (value & 0x1)
- feature_list |= CPU_FEATURE_TZCNT;
+}
- __asm {
- mov eax, 1
- cpuid
- mov value, edx
+static void get_cpu_features(void)
+{
+ sljit_u32 feature_list = CPU_FEATURE_DETECTED;
+ sljit_u32 info[4];
+ sljit_u32 max_id;
+
+ info[0] = 0;
+ execute_cpu_id(info);
+ max_id = info[0];
+
+ if (max_id >= 7) {
+ info[0] = 7;
+ info[2] = 0;
+ execute_cpu_id(info);
+
+ if (info[1] & 0x8)
+ feature_list |= CPU_FEATURE_TZCNT;
+ if (info[1] & 0x20)
+ feature_list |= CPU_FEATURE_AVX2;
}
-#endif /* _MSC_VER && _MSC_VER >= 1400 */
+ if (max_id >= 1) {
+ info[0] = 1;
+ execute_cpu_id(info);
+ if (info[2] & 0x80000)
+ feature_list |= CPU_FEATURE_SSE41;
+ if (info[2] & 0x10000000)
+ feature_list |= CPU_FEATURE_AVX;
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
- if (value & 0x4000000)
- feature_list |= CPU_FEATURE_SSE2;
+ if (info[3] & 0x4000000)
+ feature_list |= CPU_FEATURE_SSE2;
#endif
- if (value & 0x8000)
- feature_list |= CPU_FEATURE_CMOV;
+ if (info[3] & 0x8000)
+ feature_list |= CPU_FEATURE_CMOV;
+ }
+
+ info[0] = 0x80000001;
+ info[2] = 0; /* Silences an incorrect compiler warning. */
+ execute_cpu_id(info);
+
+ if (info[2] & 0x20)
+ feature_list |= CPU_FEATURE_LZCNT;
cpu_feature_list = feature_list;
}
@@ -458,15 +550,15 @@ static sljit_u8 get_jump_code(sljit_uw type)
{
switch (type) {
case SLJIT_EQUAL:
+ case SLJIT_ATOMIC_STORED:
case SLJIT_F_EQUAL:
case SLJIT_UNORDERED_OR_EQUAL:
- case SLJIT_ORDERED_EQUAL: /* Not supported. */
return 0x84 /* je */;
case SLJIT_NOT_EQUAL:
+ case SLJIT_ATOMIC_NOT_STORED:
case SLJIT_F_NOT_EQUAL:
case SLJIT_ORDERED_NOT_EQUAL:
- case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not supported. */
return 0x85 /* jne */;
case SLJIT_LESS:
@@ -514,9 +606,11 @@ static sljit_u8 get_jump_code(sljit_uw type)
return 0x81 /* jno */;
case SLJIT_UNORDERED:
+ case SLJIT_ORDERED_EQUAL: /* NaN. */
return 0x8a /* jp */;
case SLJIT_ORDERED:
+ case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not NaN. */
return 0x8b /* jpo */;
}
return 0;
@@ -541,7 +635,7 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code
label_addr = jump->u.target - (sljit_uw)executable_offset;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
+ if ((sljit_sw)(label_addr - (jump->addr + 2)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 6)) < HALFWORD_MIN)
return generate_far_jump_code(jump, code_ptr);
#endif
@@ -737,7 +831,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
switch (feature_type) {
case SLJIT_HAS_FPU:
#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
if (cpu_feature_list == 0)
get_cpu_features();
@@ -768,19 +862,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
get_cpu_features();
return (cpu_feature_list & CPU_FEATURE_CMOV) != 0;
+ case SLJIT_HAS_REV:
case SLJIT_HAS_ROT:
case SLJIT_HAS_PREFETCH:
+ case SLJIT_HAS_COPY_F32:
+ case SLJIT_HAS_COPY_F64:
+ case SLJIT_HAS_ATOMIC:
return 1;
- case SLJIT_HAS_SSE2:
-#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+#if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE
+ case SLJIT_HAS_AVX:
if (cpu_feature_list == 0)
get_cpu_features();
- return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
-#else /* !SLJIT_DETECT_SSE2 */
- return 1;
-#endif /* SLJIT_DETECT_SSE2 */
-
+ return (cpu_feature_list & CPU_FEATURE_AVX) != 0;
+ case SLJIT_HAS_AVX2:
+ if (cpu_feature_list == 0)
+ get_cpu_features();
+ return (cpu_feature_list & CPU_FEATURE_AVX2) != 0;
+ case SLJIT_HAS_SIMD:
+ if (cpu_feature_list == 0)
+ get_cpu_features();
+ return (cpu_feature_list & CPU_FEATURE_SSE41) != 0;
+#endif /* SLJIT_IS_FPU_AVAILABLE */
default:
return 0;
}
@@ -788,16 +891,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
{
- if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL)
- return 0;
-
switch (type) {
case SLJIT_ORDERED_EQUAL:
case SLJIT_UNORDERED_OR_NOT_EQUAL:
- return 0;
+ return 2;
}
- return 1;
+ return 0;
}
/* --------------------------------------------------------------------- */
@@ -841,6 +941,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
#endif /* SLJIT_CONFIG_X86_64 */
+static sljit_s32 emit_byte(struct sljit_compiler *compiler, sljit_u8 byte)
+{
+ sljit_u8 *inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
+ FAIL_IF(!inst);
+ INC_SIZE(1);
+ *inst = byte;
+ return SLJIT_SUCCESS;
+}
+
static sljit_s32 emit_mov(struct sljit_compiler *compiler,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw);
@@ -848,6 +957,14 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler,
#define EMIT_MOV(compiler, dst, dstw, src, srcw) \
FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
+static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
+ sljit_uw op,
+ sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
+
+static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
+ sljit_uw op,
+ sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
+
static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
@@ -858,6 +975,10 @@ static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w);
+static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src, sljit_sw srcw);
+
static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)
{
#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
@@ -866,14 +987,14 @@ static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
FAIL_IF(!inst);
INC_SIZE(4);
- *inst++ = 0xf3;
- *inst++ = 0x0f;
- *inst++ = 0x1e;
+ inst[0] = GROUP_F3;
+ inst[1] = GROUP_0F;
+ inst[2] = 0x1e;
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- *inst = 0xfb;
-#else
- *inst = 0xfa;
-#endif
+ inst[3] = 0xfb;
+#else /* !SLJIT_CONFIG_X86_32 */
+ inst[3] = 0xfa;
+#endif /* SLJIT_CONFIG_X86_32 */
#else /* !SLJIT_CONFIG_X86_CET */
SLJIT_UNUSED_ARG(compiler);
#endif /* SLJIT_CONFIG_X86_CET */
@@ -896,13 +1017,17 @@ static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
FAIL_IF(!inst);
INC_SIZE(size);
- *inst++ = 0xf3;
+ *inst++ = GROUP_F3;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
#endif
- *inst++ = 0x0f;
- *inst++ = 0x1e;
- *inst = (0x3 << 6) | (0x1 << 3) | (reg_map[reg] & 0x7);
+ inst[0] = GROUP_0F;
+ inst[1] = 0x1e;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ inst[2] = U8(MOD_REG | (0x1 << 3) | reg_lmap[reg]);
+#else
+ inst[2] = U8(MOD_REG | (0x1 << 3) | reg_map[reg]);
+#endif
return SLJIT_SUCCESS;
}
@@ -920,13 +1045,13 @@ static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
FAIL_IF(!inst);
INC_SIZE(size);
- *inst++ = 0xf3;
+ *inst++ = GROUP_F3;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
#endif
- *inst++ = 0x0f;
- *inst++ = 0xae;
- *inst = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);
+ inst[0] = GROUP_0F;
+ inst[1] = 0xae;
+ inst[2] = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);
return SLJIT_SUCCESS;
}
@@ -954,19 +1079,7 @@ static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compile
FAIL_IF(emit_rdssp(compiler, TMP_REG1));
/* Load return address on shadow stack into TMP_REG1. */
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- SLJIT_ASSERT(reg_map[TMP_REG1] == 5);
-
- /* Hand code unsupported "mov 0x0(%ebp),%ebp". */
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
- FAIL_IF(!inst);
- INC_SIZE(3);
- *inst++ = 0x8b;
- *inst++ = 0x6d;
- *inst = 0;
-#else /* !SLJIT_CONFIG_X86_32 */
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);
-#endif /* SLJIT_CONFIG_X86_32 */
/* Compare return address against TMP_REG1. */
FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));
@@ -994,8 +1107,8 @@ static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compile
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
INC_SIZE(2);
- *inst++ = JMP_i8;
- *inst = size_before_rdssp_inst - compiler->size;
+ inst[0] = JMP_i8;
+ inst[1] = size_before_rdssp_inst - compiler->size;
*jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;
#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
@@ -1024,7 +1137,8 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler,
*inst = MOV_rm_r;
return SLJIT_SUCCESS;
}
- if (src & SLJIT_IMM) {
+
+ if (src == SLJIT_IMM) {
if (FAST_IS_REG(dst)) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
@@ -1071,6 +1185,27 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler,
return SLJIT_SUCCESS;
}
+static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_u8* inst;
+ sljit_uw size;
+
+ SLJIT_ASSERT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL);
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!inst);
+ INC_SIZE(2);
+ inst[0] = U8(get_jump_code((sljit_uw)type ^ 0x1) - 0x10);
+
+ size = compiler->size;
+ EMIT_MOV(compiler, dst_reg, 0, src, srcw);
+
+ inst[1] = U8(compiler->size - size);
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{
sljit_u8 *inst;
@@ -1083,17 +1218,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
switch (GET_OPCODE(op)) {
case SLJIT_BREAKPOINT:
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1);
- *inst = INT3;
- break;
+ return emit_byte(compiler, INT3);
case SLJIT_NOP:
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1);
- *inst = NOP;
- break;
+ return emit_byte(compiler, NOP);
case SLJIT_LMUL_UW:
case SLJIT_LMUL_SW:
case SLJIT_DIVMOD_UW:
@@ -1134,23 +1261,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
#endif
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1);
- *inst = CDQ;
+ FAIL_IF(emit_byte(compiler, CDQ));
#else
- if (compiler->mode32) {
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1);
- *inst = CDQ;
- } else {
+ if (!compiler->mode32) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
INC_SIZE(2);
- *inst++ = REX_W;
- *inst = CDQ;
- }
+ inst[0] = REX_W;
+ inst[1] = CDQ;
+ } else
+ FAIL_IF(emit_byte(compiler, CDQ));
#endif
}
@@ -1158,14 +1278,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
INC_SIZE(2);
- *inst++ = GROUP_F7;
- *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
-#else
+ inst[0] = GROUP_F7;
+ inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
+#else /* !SLJIT_CONFIG_X86_32 */
#ifdef _WIN64
size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
-#else
+#else /* !_WIN64 */
size = (!compiler->mode32) ? 3 : 2;
-#endif
+#endif /* _WIN64 */
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
FAIL_IF(!inst);
INC_SIZE(size);
@@ -1174,29 +1294,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
*inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
else if (op >= SLJIT_DIVMOD_UW)
*inst++ = REX_B;
- *inst++ = GROUP_F7;
- *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
-#else
+ inst[0] = GROUP_F7;
+ inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
+#else /* !_WIN64 */
if (!compiler->mode32)
*inst++ = REX_W;
- *inst++ = GROUP_F7;
- *inst = MOD_REG | reg_map[SLJIT_R1];
-#endif
-#endif
+ inst[0] = GROUP_F7;
+ inst[1] = MOD_REG | reg_map[SLJIT_R1];
+#endif /* _WIN64 */
+#endif /* SLJIT_CONFIG_X86_32 */
switch (op) {
case SLJIT_LMUL_UW:
- *inst |= MUL;
+ inst[1] |= MUL;
break;
case SLJIT_LMUL_SW:
- *inst |= IMUL;
+ inst[1] |= IMUL;
break;
case SLJIT_DIVMOD_UW:
case SLJIT_DIV_UW:
- *inst |= DIV;
+ inst[1] |= DIV;
break;
case SLJIT_DIVMOD_SW:
case SLJIT_DIV_SW:
- *inst |= IDIV;
+ inst[1] |= IDIV;
break;
}
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
@@ -1216,29 +1336,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
-#define ENCODE_PREFIX(prefix) \
- do { \
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
- FAIL_IF(!inst); \
- INC_SIZE(1); \
- *inst = U8(prefix); \
- } while (0)
-
static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
sljit_u8* inst;
sljit_s32 dst_r;
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- sljit_s32 work_r;
-#endif
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
#endif
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
if (FAST_IS_REG(dst)) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
@@ -1267,100 +1376,33 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
#else
dst_r = src;
#endif
- }
+ } else {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
- /* src, dst are registers. */
- SLJIT_ASSERT(FAST_IS_REG(dst));
- if (reg_map[dst] < 4) {
- if (dst != src)
- EMIT_MOV(compiler, dst, 0, src, 0);
- inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
- }
- else {
- if (dst != src)
- EMIT_MOV(compiler, dst, 0, src, 0);
- if (sign) {
- /* shl reg, 24 */
- inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
- FAIL_IF(!inst);
- *inst |= SHL;
- /* sar reg, 24 */
- inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
- FAIL_IF(!inst);
- *inst |= SAR;
- }
- else {
+ if (FAST_IS_REG(src) && reg_map[src] >= 4) {
+ /* Both src and dst are registers. */
+ SLJIT_ASSERT(FAST_IS_REG(dst));
+
+ if (src == dst && !sign) {
inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
FAIL_IF(!inst);
*(inst + 1) |= AND;
+ return SLJIT_SUCCESS;
}
+
+ EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
+ src = TMP_REG1;
+ srcw = 0;
}
- return SLJIT_SUCCESS;
- }
-#endif
- else {
+#endif /* !SLJIT_CONFIG_X86_32 */
+
/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
+ FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm8 : MOVZX_r_rm8, dst_r, src, srcw));
}
if (dst & SLJIT_MEM) {
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- if (dst_r == TMP_REG1) {
- /* Find a non-used register, whose reg_map[src] < 4. */
- if ((dst & REG_MASK) == SLJIT_R0) {
- if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
- work_r = SLJIT_R2;
- else
- work_r = SLJIT_R1;
- }
- else {
- if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
- work_r = SLJIT_R0;
- else if ((dst & REG_MASK) == SLJIT_R1)
- work_r = SLJIT_R2;
- else
- work_r = SLJIT_R1;
- }
-
- if (work_r == SLJIT_R0) {
- ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]);
- }
- else {
- inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
- FAIL_IF(!inst);
- *inst = XCHG_r_rm;
- }
-
- inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
- FAIL_IF(!inst);
- *inst = MOV_rm8_r8;
-
- if (work_r == SLJIT_R0) {
- ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]);
- }
- else {
- inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
- FAIL_IF(!inst);
- *inst = XCHG_r_rm;
- }
- }
- else {
- inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
- FAIL_IF(!inst);
- *inst = MOV_rm8_r8;
- }
-#else
inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
FAIL_IF(!inst);
*inst = MOV_rm8_r8;
-#endif
}
return SLJIT_SUCCESS;
@@ -1377,15 +1419,15 @@ static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst++ = PREFETCH;
+ inst[0] = GROUP_0F;
+ inst[1] = PREFETCH;
if (op == SLJIT_PREFETCH_L1)
- *inst |= (1 << 3);
+ inst[2] |= (1 << 3);
else if (op == SLJIT_PREFETCH_L2)
- *inst |= (2 << 3);
+ inst[2] |= (2 << 3);
else if (op == SLJIT_PREFETCH_L3)
- *inst |= (3 << 3);
+ inst[2] |= (3 << 3);
return SLJIT_SUCCESS;
}
@@ -1401,7 +1443,7 @@ static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
compiler->mode32 = 0;
#endif
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
if (FAST_IS_REG(dst)) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
@@ -1422,12 +1464,8 @@ static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
dst_r = src;
- else {
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
- }
+ else
+ FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm16 : MOVZX_r_rm16, dst_r, src, srcw));
if (dst & SLJIT_MEM) {
inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
@@ -1448,8 +1486,8 @@ static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
/* Same input and output */
inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
FAIL_IF(!inst);
- *inst++ = GROUP_F7;
- *inst |= opcode;
+ inst[0] = GROUP_F7;
+ inst[1] |= opcode;
return SLJIT_SUCCESS;
}
@@ -1457,46 +1495,16 @@ static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
EMIT_MOV(compiler, dst, 0, src, srcw);
inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
FAIL_IF(!inst);
- *inst++ = GROUP_F7;
- *inst |= opcode;
+ inst[0] = GROUP_F7;
+ inst[1] |= opcode;
return SLJIT_SUCCESS;
}
EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
FAIL_IF(!inst);
- *inst++ = GROUP_F7;
- *inst |= opcode;
- EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
- return SLJIT_SUCCESS;
-}
-
-static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
- sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw)
-{
- sljit_u8* inst;
-
- if (FAST_IS_REG(dst)) {
- EMIT_MOV(compiler, dst, 0, src, srcw);
- inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
- FAIL_IF(!inst);
- *inst++ = GROUP_F7;
- *inst |= NOT_rm;
- inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
- FAIL_IF(!inst);
- *inst = OR_r_rm;
- return SLJIT_SUCCESS;
- }
-
- EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
- inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- *inst++ = GROUP_F7;
- *inst |= NOT_rm;
- inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- *inst = OR_r_rm;
+ inst[0] = GROUP_F7;
+ inst[1] |= opcode;
EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
@@ -1514,32 +1522,19 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
sljit_s32 dst_r;
sljit_sw max;
- if (cpu_feature_list == 0)
- get_cpu_features();
+ SLJIT_ASSERT(cpu_feature_list != 0);
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) {
- /* Group prefix added separately. */
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1);
- *inst++ = GROUP_F3;
-
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = is_clz ? LZCNT_r_rm : TZCNT_r_rm;
+ FAIL_IF(emit_groupf(compiler, (is_clz ? LZCNT_r_rm : TZCNT_r_rm) | EX86_PREF_F3, dst_r, src, srcw));
if (dst & SLJIT_MEM)
EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = is_clz ? BSR_r_rm : BSF_r_rm;
+ FAIL_IF(emit_groupf(compiler, is_clz ? BSR_r_rm : BSF_r_rm, dst_r, src, srcw));
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
max = is_clz ? (32 + 31) : 32;
@@ -1553,11 +1548,11 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg);
FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = CMOVE_r_rm;
+ inst[0] = GROUP_0F;
+ inst[1] = CMOVE_r_rm;
}
else
- FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
+ FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
if (is_clz) {
inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
@@ -1572,14 +1567,9 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
if (cpu_feature_list & CPU_FEATURE_CMOV) {
EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max);
-
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = CMOVE_r_rm;
- }
- else
- FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
+ FAIL_IF(emit_groupf(compiler, CMOVE_r_rm, dst_r, TMP_REG2, 0));
+ } else
+ FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
if (is_clz) {
inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0);
@@ -1593,14 +1583,109 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
return SLJIT_SUCCESS;
}
+static sljit_s32 emit_bswap(struct sljit_compiler *compiler,
+ sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_u8 *inst;
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ sljit_uw size;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ sljit_u8 rex = 0;
+#else /* !SLJIT_CONFIG_X86_64 */
+ sljit_s32 dst_is_ereg = op & SLJIT_32;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (op == SLJIT_REV_U32 || op == SLJIT_REV_S32)
+ compiler->mode32 = 1;
+#else /* !SLJIT_CONFIG_X86_64 */
+ op &= ~SLJIT_32;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (src != dst_r) {
+ /* Only the lower 16 bit is read for eregs. */
+ if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
+ FAIL_IF(emit_mov_half(compiler, 0, dst_r, 0, src, srcw));
+ else
+ EMIT_MOV(compiler, dst_r, 0, src, srcw);
+ }
+
+ size = 2;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (!compiler->mode32)
+ rex = REX_W;
+
+ if (reg_map[dst_r] >= 8)
+ rex |= REX_B;
+
+ if (rex != 0)
+ size++;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
+ FAIL_IF(!inst);
+ INC_SIZE(size);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (rex != 0)
+ *inst++ = rex;
+
+ inst[0] = GROUP_0F;
+ inst[1] = BSWAP_r | reg_lmap[dst_r];
+#else /* !SLJIT_CONFIG_X86_64 */
+ inst[0] = GROUP_0F;
+ inst[1] = BSWAP_r | reg_map[dst_r];
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ size = compiler->mode32 ? 16 : 48;
+#else /* !SLJIT_CONFIG_X86_64 */
+ size = 16;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, (sljit_sw)size, dst_r, 0);
+ FAIL_IF(!inst);
+ if (op == SLJIT_REV_U16)
+ inst[1] |= SHR;
+ else
+ inst[1] |= SAR;
+ }
+
+ if (dst & SLJIT_MEM) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ if (dst_is_ereg)
+ op = SLJIT_REV;
+#endif /* SLJIT_CONFIG_X86_32 */
+ if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
+ return emit_mov_half(compiler, 0, dst, dstw, TMP_REG1, 0);
+
+ return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+ }
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (op == SLJIT_REV_S32) {
+ compiler->mode32 = 0;
+ inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
+ FAIL_IF(!inst);
+ *inst = MOVSXD_r_rm;
+ }
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
- sljit_s32 op_flags = GET_ALL_FLAGS(op);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
sljit_s32 dst_is_ereg = 0;
-#endif
+#else /* !SLJIT_CONFIG_X86_32 */
+ sljit_s32 op_flags = GET_ALL_FLAGS(op);
+#endif /* SLJIT_CONFIG_X86_32 */
CHECK_ERROR();
CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
@@ -1611,14 +1696,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
CHECK_EXTRA_REGS(src, srcw, (void)0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = op_flags & SLJIT_32;
-#endif
+#endif /* SLJIT_CONFIG_X86_64 */
op = GET_OPCODE(op);
if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
-#endif
+#endif /* SLJIT_CONFIG_X86_64 */
if (FAST_IS_REG(src) && src == dst) {
if (!TYPE_CAST_NEEDED(op))
@@ -1631,14 +1716,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
if (op == SLJIT_MOV_S32)
op = SLJIT_MOV_U32;
}
- else if (src & SLJIT_IMM) {
+ else if (src == SLJIT_IMM) {
if (op == SLJIT_MOV_U32)
op = SLJIT_MOV_S32;
}
}
-#endif
+#endif /* SLJIT_CONFIG_X86_64 */
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
switch (op) {
case SLJIT_MOV_U8:
srcw = (sljit_u8)srcw;
@@ -1659,12 +1744,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
case SLJIT_MOV_S32:
srcw = (sljit_s32)srcw;
break;
-#endif
+#endif /* SLJIT_CONFIG_X86_64 */
}
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
if (SLJIT_UNLIKELY(dst_is_ereg))
return emit_mov(compiler, dst, dstw, src, srcw);
-#endif
+#endif /* SLJIT_CONFIG_X86_32 */
}
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -1672,7 +1757,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
dst = TMP_REG1;
}
-#endif
+#endif /* SLJIT_CONFIG_X86_32 */
switch (op) {
case SLJIT_MOV:
@@ -1681,7 +1766,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
case SLJIT_MOV_U32:
case SLJIT_MOV_S32:
case SLJIT_MOV32:
-#endif
+#endif /* SLJIT_CONFIG_X86_32 */
EMIT_MOV(compiler, dst, dstw, src, srcw);
break;
case SLJIT_MOV_U8:
@@ -1708,25 +1793,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
EMIT_MOV(compiler, dst, dstw, src, srcw);
compiler->mode32 = 0;
break;
-#endif
+#endif /* SLJIT_CONFIG_X86_64 */
}
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
-#endif
+#endif /* SLJIT_CONFIG_X86_32 */
return SLJIT_SUCCESS;
}
switch (op) {
- case SLJIT_NOT:
- if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
- return emit_not_with_flags(compiler, dst, dstw, src, srcw);
- return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
-
case SLJIT_CLZ:
case SLJIT_CTZ:
return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw);
+ case SLJIT_REV:
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ if (dst_is_ereg)
+ op |= SLJIT_32;
+#endif /* SLJIT_CONFIG_X86_32 */
+ return emit_bswap(compiler, op, dst, dstw, src, srcw);
}
return SLJIT_SUCCESS;
@@ -1745,7 +1835,7 @@ static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
sljit_u8 op_imm = U8(op_types & 0xff);
if (dst == src1 && dstw == src1w) {
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
#else
@@ -1779,7 +1869,7 @@ static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
/* Only for cumulative operations. */
if (dst == src2 && dstw == src2w) {
- if (src1 & SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
#else
@@ -1813,7 +1903,7 @@ static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
/* General version. */
if (FAST_IS_REG(dst)) {
EMIT_MOV(compiler, dst, 0, src1, src1w);
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
}
else {
@@ -1825,7 +1915,7 @@ static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
else {
/* This version requires less memory writing. */
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
}
else {
@@ -1852,7 +1942,7 @@ static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
sljit_u8 op_imm = U8(op_types & 0xff);
if (dst == src1 && dstw == src1w) {
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
#else
@@ -1886,7 +1976,7 @@ static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
/* General version. */
if (FAST_IS_REG(dst) && dst != src2) {
EMIT_MOV(compiler, dst, 0, src1, src1w);
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
}
else {
@@ -1898,7 +1988,7 @@ static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
else {
/* This version requires less memory writing. */
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
}
else {
@@ -1921,20 +2011,12 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
/* Register destination. */
- if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = IMUL_r_rm;
- }
- else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = IMUL_r_rm;
- }
- else if (src1 & SLJIT_IMM) {
- if (src2 & SLJIT_IMM) {
+ if (dst_r == src1 && src2 != SLJIT_IMM) {
+ FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
+ } else if (dst_r == src2 && src1 != SLJIT_IMM) {
+ FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src1, src1w));
+ } else if (src1 == SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
src2 = dst_r;
src2w = 0;
@@ -1944,10 +2026,8 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
FAIL_IF(!inst);
*inst = IMUL_r_rm_i8;
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1);
- *inst = U8(src1w);
+
+ FAIL_IF(emit_byte(compiler, U8(src1w)));
}
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
else {
@@ -1973,30 +2053,26 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
if (dst_r != src2)
EMIT_MOV(compiler, dst_r, 0, src2, src2w);
FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = IMUL_r_rm;
+ FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
}
#endif
}
- else if (src2 & SLJIT_IMM) {
+ else if (src2 == SLJIT_IMM) {
/* Note: src1 is NOT immediate. */
if (src2w <= 127 && src2w >= -128) {
inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
FAIL_IF(!inst);
*inst = IMUL_r_rm_i8;
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1);
- *inst = U8(src2w);
+
+ FAIL_IF(emit_byte(compiler, U8(src2w)));
}
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
else {
inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
FAIL_IF(!inst);
*inst = IMUL_r_rm_i32;
+
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
FAIL_IF(!inst);
INC_SIZE(4);
@@ -2007,31 +2083,24 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
FAIL_IF(!inst);
*inst = IMUL_r_rm_i32;
+
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
FAIL_IF(!inst);
INC_SIZE(4);
sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
- }
- else {
+ } else {
if (dst_r != src1)
EMIT_MOV(compiler, dst_r, 0, src1, src1w);
FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = IMUL_r_rm;
+ FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
}
#endif
- }
- else {
+ } else {
/* Neither argument is immediate. */
if (ADDRESSING_DEPENDS_ON(src2, dst_r))
dst_r = TMP_REG1;
EMIT_MOV(compiler, dst_r, 0, src1, src1w);
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = IMUL_r_rm;
+ FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
}
if (dst & SLJIT_MEM)
@@ -2064,10 +2133,10 @@ static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
done = 1;
}
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+ if (src2 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src2w))) {
inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
#else
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
#endif
FAIL_IF(!inst);
@@ -2077,10 +2146,10 @@ static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
}
else if (FAST_IS_REG(src2)) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+ if (src1 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src1w))) {
inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
#else
- if (src1 & SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
#endif
FAIL_IF(!inst);
@@ -2104,16 +2173,16 @@ static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
sljit_u8* inst;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+ if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
#else
- if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
+ if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
#endif
BINARY_EAX_IMM(CMP_EAX_i32, src2w);
return SLJIT_SUCCESS;
}
if (FAST_IS_REG(src1)) {
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
}
else {
@@ -2124,15 +2193,15 @@ static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
return SLJIT_SUCCESS;
}
- if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
+ if (FAST_IS_REG(src2) && src1 != SLJIT_IMM) {
inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
FAIL_IF(!inst);
*inst = CMP_rm_r;
return SLJIT_SUCCESS;
}
- if (src2 & SLJIT_IMM) {
- if (src1 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
src1 = TMP_REG1;
src1w = 0;
@@ -2155,25 +2224,25 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
sljit_u8* inst;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+ if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
#else
- if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
+ if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
#endif
BINARY_EAX_IMM(TEST_EAX_i32, src2w);
return SLJIT_SUCCESS;
}
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+ if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
#else
- if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
+ if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128)) {
#endif
BINARY_EAX_IMM(TEST_EAX_i32, src1w);
return SLJIT_SUCCESS;
}
- if (!(src1 & SLJIT_IMM)) {
- if (src2 & SLJIT_IMM) {
+ if (src1 != SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (IS_HALFWORD(src2w) || compiler->mode32) {
inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
@@ -2201,8 +2270,8 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
}
}
- if (!(src2 & SLJIT_IMM)) {
- if (src1 & SLJIT_IMM) {
+ if (src2 != SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (IS_HALFWORD(src1w) || compiler->mode32) {
inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
@@ -2231,7 +2300,7 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
}
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (IS_HALFWORD(src2w) || compiler->mode32) {
inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
@@ -2269,18 +2338,18 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler,
#endif
sljit_u8* inst;
- if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
+ if (src2 == SLJIT_IMM || src2 == SLJIT_PREF_SHIFT_REG) {
if (dst == src1 && dstw == src1w) {
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
return SLJIT_SUCCESS;
}
if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
@@ -2288,14 +2357,14 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler,
EMIT_MOV(compiler, dst, 0, src1, src1w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
return SLJIT_SUCCESS;
}
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
@@ -2305,7 +2374,7 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler,
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
}
@@ -2323,7 +2392,7 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler,
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
#endif
@@ -2349,7 +2418,7 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler,
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
@@ -2372,7 +2441,7 @@ static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
sljit_s32 src2, sljit_sw src2w)
{
/* The CPU does not set flags if the shift count is 0. */
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
src2w &= compiler->mode32 ? 0x1f : 0x3f;
#else /* !SLJIT_CONFIG_X86_64 */
@@ -2437,7 +2506,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);
if (!HAS_FLAGS(op)) {
- if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
+ if (src2 == SLJIT_IMM && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
return compiler->error;
if (FAST_IS_REG(dst) && src2 == dst) {
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));
@@ -2459,6 +2528,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_cum_binary(compiler, BINARY_OPCODE(OR),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_XOR:
+ if (!HAS_FLAGS(op)) {
+ if (src2 == SLJIT_IMM && src2w == -1)
+ return emit_unary(compiler, NOT_rm, dst, dstw, src1, src1w);
+ if (src1 == SLJIT_IMM && src1w == -1)
+ return emit_unary(compiler, NOT_rm, dst, dstw, src2, src2w);
+ }
+
return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SHL:
@@ -2514,117 +2590,192 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 src_dst,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
+ sljit_s32 dst_reg,
+ sljit_s32 src1_reg,
+ sljit_s32 src2_reg,
+ sljit_s32 src3, sljit_sw src3w)
{
- sljit_s32 restore_ecx = 0;
- sljit_s32 is_rotate, is_left;
+ sljit_s32 is_rotate, is_left, move_src1;
sljit_u8* inst;
+ sljit_sw src1w = 0;
sljit_sw dstw = 0;
+ /* The whole register must be saved even for 32 bit operations. */
+ sljit_u8 restore_ecx = 0;
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- sljit_s32 tmp2 = SLJIT_MEM1(SLJIT_SP);
-#else /* !SLJIT_CONFIG_X86_32 */
- sljit_s32 tmp2 = TMP_REG2;
+ sljit_sw src2w = 0;
+ sljit_s32 restore_sp4 = 0;
#endif /* SLJIT_CONFIG_X86_32 */
CHECK_ERROR();
- CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
- ADJUST_LOCAL_OFFSET(src1, src1w);
- ADJUST_LOCAL_OFFSET(src2, src2w);
+ CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
+ ADJUST_LOCAL_OFFSET(src3, src3w);
- CHECK_EXTRA_REGS(src1, src1w, (void)0);
- CHECK_EXTRA_REGS(src2, src2w, (void)0);
+ CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);
+ CHECK_EXTRA_REGS(src3, src3w, (void)0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = op & SLJIT_32;
-#endif
+#endif /* SLJIT_CONFIG_X86_64 */
- if (src2 & SLJIT_IMM) {
+ if (src3 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- src2w &= 0x1f;
+ src3w &= 0x1f;
#else /* !SLJIT_CONFIG_X86_32 */
- src2w &= (op & SLJIT_32) ? 0x1f : 0x3f;
+ src3w &= (op & SLJIT_32) ? 0x1f : 0x3f;
#endif /* SLJIT_CONFIG_X86_32 */
- if (src2w == 0)
+ if (src3w == 0)
return SLJIT_SUCCESS;
}
is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
- is_rotate = (src_dst == src1);
- CHECK_EXTRA_REGS(src_dst, dstw, (void)0);
+ is_rotate = (src1_reg == src2_reg);
+ CHECK_EXTRA_REGS(src1_reg, src1w, (void)0);
+ CHECK_EXTRA_REGS(src2_reg, src2w, (void)0);
if (is_rotate)
- return emit_shift(compiler, is_left ? ROL : ROR, src_dst, dstw, src1, src1w, src2, src2w);
+ return emit_shift(compiler, is_left ? ROL : ROR, dst_reg, dstw, src1_reg, src1w, src3, src3w);
- if ((src2 & SLJIT_IMM) || src2 == SLJIT_PREF_SHIFT_REG) {
- if (!FAST_IS_REG(src1)) {
- EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
- src1 = TMP_REG1;
- }
- } else if (FAST_IS_REG(src1)) {
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- compiler->mode32 = 0;
-#endif
- EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- compiler->mode32 = op & SLJIT_32;
-#endif
- EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
-
- if (src1 == SLJIT_PREF_SHIFT_REG)
- src1 = TMP_REG1;
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ if (src2_reg & SLJIT_MEM) {
+ EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w);
+ src2_reg = TMP_REG1;
+ }
+#endif /* SLJIT_CONFIG_X86_32 */
- if (src_dst == SLJIT_PREF_SHIFT_REG)
- src_dst = TMP_REG1;
+ if (dst_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
+ src1_reg = TMP_REG1;
+ src1w = 0;
+#else /* !SLJIT_CONFIG_X86_64 */
+ if (src2_reg != TMP_REG1) {
+ EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
+ src1_reg = TMP_REG1;
+ src1w = 0;
+ } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
+ restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
+ EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
+ EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
+ src1_reg = restore_sp4;
+ src1w = 0;
+ } else {
+ EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
+ restore_sp4 = src1_reg;
+ }
+#endif /* SLJIT_CONFIG_X86_64 */
- restore_ecx = 1;
+ if (src3 != SLJIT_PREF_SHIFT_REG)
+ EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
} else {
- EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+ if (src2_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- compiler->mode32 = 0;
-#endif
- EMIT_MOV(compiler, tmp2, 0, SLJIT_PREF_SHIFT_REG, 0);
+ compiler->mode32 = 0;
+#endif /* SLJIT_CONFIG_X86_64 */
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- compiler->mode32 = op & SLJIT_32;
-#endif
- EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
+ compiler->mode32 = op & SLJIT_32;
+#endif /* SLJIT_CONFIG_X86_64 */
+ src2_reg = TMP_REG1;
+ restore_ecx = 1;
+ }
- src1 = TMP_REG1;
+ move_src1 = 0;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (dst_reg != src1_reg) {
+ if (dst_reg != src3) {
+ EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
+ src1_reg = dst_reg;
+ src1w = 0;
+ } else
+ move_src1 = 1;
+ }
+#else /* !SLJIT_CONFIG_X86_64 */
+ if (dst_reg & SLJIT_MEM) {
+ if (src2_reg != TMP_REG1) {
+ EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
+ src1_reg = TMP_REG1;
+ src1w = 0;
+ } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
+ restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
+ EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
+ EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
+ src1_reg = restore_sp4;
+ src1w = 0;
+ } else {
+ EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
+ restore_sp4 = src1_reg;
+ }
+ } else if (dst_reg != src1_reg) {
+ if (dst_reg != src3) {
+ EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
+ src1_reg = dst_reg;
+ src1w = 0;
+ } else
+ move_src1 = 1;
+ }
+#endif /* SLJIT_CONFIG_X86_64 */
- if (src_dst == SLJIT_PREF_SHIFT_REG) {
- src_dst = tmp2;
- SLJIT_ASSERT(dstw == 0);
+ if (src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
+ if (!restore_ecx) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 0;
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
+ compiler->mode32 = op & SLJIT_32;
+ restore_ecx = 1;
+#else /* !SLJIT_CONFIG_X86_64 */
+ if (src1_reg != TMP_REG1 && src2_reg != TMP_REG1) {
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
+ restore_ecx = 1;
+ } else {
+ EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
+ restore_ecx = 2;
+ }
+#endif /* SLJIT_CONFIG_X86_64 */
+ }
+ EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
}
- restore_ecx = 2;
+ if (move_src1) {
+ EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
+ src1_reg = dst_reg;
+ src1w = 0;
+ }
}
- inst = emit_x86_instruction(compiler, 2, src1, 0, src_dst, dstw);
+ inst = emit_x86_instruction(compiler, 2, src2_reg, 0, src1_reg, src1w);
FAIL_IF(!inst);
inst[0] = GROUP_0F;
- if (src2 & SLJIT_IMM) {
+ if (src3 == SLJIT_IMM) {
inst[1] = U8((is_left ? SHLD : SHRD) - 1);
- /* Immedate argument is added separately. */
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1);
- *inst = U8(src2w);
+ /* Immediate argument is added separately. */
+ FAIL_IF(emit_byte(compiler, U8(src3w)));
} else
inst[1] = U8(is_left ? SHLD : SHRD);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- compiler->mode32 = 0;
-#endif
+ if (restore_ecx) {
+ compiler->mode32 = 0;
+ EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+ }
- if (restore_ecx == 1)
- return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
- if (restore_ecx == 2)
- return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, tmp2, 0);
+ if (src1_reg != dst_reg) {
+ compiler->mode32 = op & SLJIT_32;
+ return emit_mov(compiler, dst_reg, dstw, src1_reg, 0);
+ }
+#else /* !SLJIT_CONFIG_X86_64 */
+ if (restore_ecx)
+ EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, restore_ecx == 1 ? TMP_REG1 : SLJIT_MEM1(SLJIT_SP), 0);
+
+ if (src1_reg != dst_reg)
+ EMIT_MOV(compiler, dst_reg, dstw, src1_reg, 0);
+
+ if (restore_sp4)
+ return emit_mov(compiler, restore_sp4, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32));
+#endif /* SLJIT_CONFIG_X86_32 */
return SLJIT_SUCCESS;
}
@@ -2656,24 +2807,41 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw)
{
- CHECK_REG_INDEX(check_sljit_get_register_index(reg));
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
- return -1;
-#endif
- return reg_map[reg];
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ CHECK_EXTRA_REGS(dst, dstw, (void)0);
+
+ switch (op) {
+ case SLJIT_FAST_ENTER:
+ return emit_fast_enter(compiler, dst, dstw);
+ case SLJIT_GET_RETURN_ADDRESS:
+ return sljit_emit_get_return_address(compiler, dst, dstw);
+ }
+
+ return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
{
- CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+ CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
+
+ if (type == SLJIT_GP_REGISTER) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- return reg;
-#else
+ if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
+ return -1;
+#endif /* SLJIT_CONFIG_X86_32 */
+ return reg_map[reg];
+ }
+
+ if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256 && type != SLJIT_SIMD_REG_512)
+ return -1;
+
return freg_map[reg];
-#endif
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -2701,6 +2869,8 @@ static sljit_u32 *sse2_buffer;
static void init_compiler(void)
{
+ get_cpu_features();
+
/* Align to 16 bytes. */
sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf);
@@ -2714,58 +2884,60 @@ static void init_compiler(void)
sse2_buffer[13] = 0x7fffffff;
}
-static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
- sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
+static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
+ sljit_uw op,
+ sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
{
- sljit_u8 *inst;
-
- inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
+ sljit_u8 *inst = emit_x86_instruction(compiler, 2 | (op & ~(sljit_uw)0xff), dst, 0, src, srcw);
FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = opcode;
+ inst[0] = GROUP_0F;
+ inst[1] = op & 0xff;
return SLJIT_SUCCESS;
}
-static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
- sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
+static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
+ sljit_uw op,
+ sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
{
sljit_u8 *inst;
- inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
+ SLJIT_ASSERT((op & EX86_SSE2) && ((op & VEX_OP_0F38) || (op & VEX_OP_0F3A)));
+
+ inst = emit_x86_instruction(compiler, 3 | (op & ~((sljit_uw)0xff | VEX_OP_0F38 | VEX_OP_0F3A)), dst, 0, src, srcw);
FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = opcode;
+ inst[0] = GROUP_0F;
+ inst[1] = U8((op & VEX_OP_0F38) ? 0x38 : 0x3A);
+ inst[2] = op & 0xff;
return SLJIT_SUCCESS;
}
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
{
- return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
+ return emit_groupf(compiler, MOVSD_x_xm | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, dst, src, srcw);
}
static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
{
- return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
+ return emit_groupf(compiler, MOVSD_xm_x | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, src, dst, dstw);
}
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
- sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
- sljit_u8 *inst;
+ sljit_s32 dst_r;
+
+ CHECK_EXTRA_REGS(dst, dstw, (void)0);
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
compiler->mode32 = 0;
#endif
- inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = CVTTSD2SI_r_xm;
+ FAIL_IF(emit_groupf(compiler, CVTTSD2SI_r_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP2, dst_r, src, srcw));
if (dst & SLJIT_MEM)
return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
@@ -2777,14 +2949,15 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
sljit_s32 src, sljit_sw srcw)
{
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
- sljit_u8 *inst;
+
+ CHECK_EXTRA_REGS(src, srcw, (void)0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
compiler->mode32 = 0;
#endif
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
srcw = (sljit_s32)srcw;
@@ -2794,10 +2967,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
srcw = 0;
}
- inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = CVTSI2SD_x_rm;
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, srcw));
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 1;
@@ -2812,16 +2982,28 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
sljit_s32 src2, sljit_sw src2w)
{
switch (GET_FLAG_TYPE(op)) {
+ case SLJIT_ORDERED_EQUAL:
+ /* Also: SLJIT_UNORDERED_OR_NOT_EQUAL */
+ FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
+ FAIL_IF(emit_groupf(compiler, CMPS_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, TMP_FREG, src2, src2w));
+
+ /* EQ */
+ FAIL_IF(emit_byte(compiler, 0));
+
+ src1 = TMP_FREG;
+ src2 = TMP_FREG;
+ src2w = 0;
+ break;
+
case SLJIT_ORDERED_LESS:
- case SLJIT_UNORDERED_OR_GREATER_EQUAL:
case SLJIT_UNORDERED_OR_GREATER:
- case SLJIT_ORDERED_LESS_EQUAL:
+ /* Also: SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_ORDERED_LESS_EQUAL */
if (!FAST_IS_REG(src2)) {
FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
src2 = TMP_FREG;
}
- return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src2, src1, src1w);
+ return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src2, src1, src1w);
}
if (!FAST_IS_REG(src1)) {
@@ -2829,7 +3011,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
src1 = TMP_FREG;
}
- return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src1, src2, src2w);
+ return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src1, src2, src2w);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -2837,6 +3019,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
sljit_s32 src, sljit_sw srcw)
{
sljit_s32 dst_r;
+ sljit_u8 *inst;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 1;
@@ -2860,42 +3043,57 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
/* We overwrite the high bits of source. From SLJIT point of view,
this is not an issue.
Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
- FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_32, src, src, 0));
- }
- else {
+ FAIL_IF(emit_groupf(compiler, UNPCKLPD_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, src, src, 0));
+ } else {
FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw));
src = TMP_FREG;
}
- FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_32, dst_r, src, 0));
+ FAIL_IF(emit_groupf(compiler, CVTPD2PS_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, dst_r, src, 0));
if (dst_r == TMP_FREG)
return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
return SLJIT_SUCCESS;
}
if (FAST_IS_REG(dst)) {
- dst_r = dst;
- if (dst != src)
- FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw));
- }
- else {
- dst_r = TMP_FREG;
- FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw));
+ dst_r = (dst == src) ? TMP_FREG : dst;
+
+ if (src & SLJIT_MEM)
+ FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
+
+ FAIL_IF(emit_groupf(compiler, PCMPEQD_x_xm | EX86_PREF_66 | EX86_SSE2, dst_r, dst_r, 0));
+
+ inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP2, 0, 0, dst_r, 0);
+ inst[0] = GROUP_0F;
+ /* Same as PSRLD_x / PSRLQ_x */
+ inst[1] = (op & SLJIT_32) ? PSLLD_x_i8 : PSLLQ_x_i8;
+
+ if (GET_OPCODE(op) == SLJIT_ABS_F64) {
+ inst[2] |= 2 << 3;
+ FAIL_IF(emit_byte(compiler, 1));
+ } else {
+ inst[2] |= 6 << 3;
+ FAIL_IF(emit_byte(compiler, ((op & SLJIT_32) ? 31 : 63)));
+ }
+
+ if (dst_r != TMP_FREG)
+ dst_r = (src & SLJIT_MEM) ? TMP_FREG : src;
+ return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_NEG_F64 ? XORPD_x_xm : ANDPD_x_xm) | EX86_SSE2, dst, dst_r, 0);
}
+ FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
+
switch (GET_OPCODE(op)) {
case SLJIT_NEG_F64:
- FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer : sse2_buffer + 8)));
+ FAIL_IF(emit_groupf(compiler, XORPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
break;
case SLJIT_ABS_F64:
- FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer + 4 : sse2_buffer + 12)));
+ FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12)));
break;
}
- if (dst_r == TMP_FREG)
- return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
- return SLJIT_SUCCESS;
+ return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
@@ -2938,19 +3136,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
switch (GET_OPCODE(op)) {
case SLJIT_ADD_F64:
- FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
+ FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
break;
case SLJIT_SUB_F64:
- FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
+ FAIL_IF(emit_groupf(compiler, SUBSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
break;
case SLJIT_MUL_F64:
- FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
+ FAIL_IF(emit_groupf(compiler, MULSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
break;
case SLJIT_DIV_F64:
- FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
+ FAIL_IF(emit_groupf(compiler, DIVSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
break;
}
@@ -2959,6 +3157,45 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
return SLJIT_SUCCESS;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2, sljit_sw src2w)
+{
+ sljit_uw pref;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+ ADJUST_LOCAL_OFFSET(src2, src2w);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif
+
+ if (dst_freg == src1) {
+ FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
+ pref = EX86_SELECT_66(op) | EX86_SSE2;
+ FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, TMP_FREG, src1, src1w));
+ FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
+ return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, TMP_FREG, 0);
+ }
+
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
+ src1 = TMP_FREG;
+ src1w = 0;
+ }
+
+ if (dst_freg != src2)
+ FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_freg, src2, src2w));
+
+ pref = EX86_SELECT_66(op) | EX86_SSE2;
+ FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w));
+ FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
+ return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w);
+}
+
/* --------------------------------------------------------------------- */
/* Conditional instructions */
/* --------------------------------------------------------------------- */
@@ -2980,9 +3217,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
inst = (sljit_u8*)ensure_buf(compiler, 2);
PTR_FAIL_IF(!inst);
-
- *inst++ = 0;
- *inst++ = 0;
+ inst[0] = 0;
+ inst[1] = 0;
return label;
}
@@ -3010,8 +3246,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
inst = (sljit_u8*)ensure_buf(compiler, 2);
PTR_FAIL_IF_NULL(inst);
- *inst++ = 0;
- *inst++ = 1;
+ inst[0] = 0;
+ inst[1] = 1;
return jump;
}
@@ -3042,8 +3278,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
inst = (sljit_u8*)ensure_buf(compiler, 2);
FAIL_IF_NULL(inst);
- *inst++ = 0;
- *inst++ = 1;
+ inst[0] = 0;
+ inst[1] = 1;
}
else {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -3052,8 +3288,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
#endif
inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
FAIL_IF(!inst);
- *inst++ = GROUP_FF;
- *inst = U8(*inst | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm));
+ inst[0] = GROUP_FF;
+ inst[1] = U8(inst[1] | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm));
}
return SLJIT_SUCCESS;
}
@@ -3063,10 +3299,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
sljit_s32 type)
{
sljit_u8 *inst;
- sljit_u8 cond_set = 0;
+ sljit_u8 cond_set;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
sljit_s32 reg;
-#endif
+#endif /* !SLJIT_CONFIG_X86_64 */
/* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
sljit_s32 dst_save = dst;
sljit_sw dstw_save = dstw;
@@ -3086,13 +3322,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
FAIL_IF(!inst);
INC_SIZE(4 + 3);
/* Set low register to conditional flag. */
- *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
- *inst++ = GROUP_0F;
- *inst++ = cond_set;
- *inst++ = MOD_REG | reg_lmap[TMP_REG1];
- *inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B));
- *inst++ = OR_rm8_r8;
- *inst++ = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]);
+ inst[0] = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
+ inst[1] = GROUP_0F;
+ inst[2] = cond_set;
+ inst[3] = MOD_REG | reg_lmap[TMP_REG1];
+ inst[4] = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B));
+ inst[5] = OR_rm8_r8;
+ inst[6] = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]);
return SLJIT_SUCCESS;
}
@@ -3102,15 +3338,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
FAIL_IF(!inst);
INC_SIZE(4 + 4);
/* Set low register to conditional flag. */
- *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
- *inst++ = GROUP_0F;
- *inst++ = cond_set;
- *inst++ = MOD_REG | reg_lmap[reg];
- *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
+ inst[0] = (reg_map[reg] <= 7) ? REX : REX_B;
+ inst[1] = GROUP_0F;
+ inst[2] = cond_set;
+ inst[3] = MOD_REG | reg_lmap[reg];
+ inst[4] = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
/* The movzx instruction does not affect flags. */
- *inst++ = GROUP_0F;
- *inst++ = MOVZX_r_rm8;
- *inst = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]);
+ inst[5] = GROUP_0F;
+ inst[6] = MOVZX_r_rm8;
+ inst[7] = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]);
if (reg != TMP_REG1)
return SLJIT_SUCCESS;
@@ -3123,156 +3359,1311 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
-#else
+#else /* !SLJIT_CONFIG_X86_64 */
+ SLJIT_ASSERT(reg_map[TMP_REG1] < 4);
+
/* The SLJIT_CONFIG_X86_32 code path starts here. */
- if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
- if (reg_map[dst] <= 4) {
- /* Low byte is accessible. */
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
- FAIL_IF(!inst);
- INC_SIZE(3 + 3);
- /* Set low byte to conditional flag. */
- *inst++ = GROUP_0F;
- *inst++ = cond_set;
- *inst++ = U8(MOD_REG | reg_map[dst]);
-
- *inst++ = GROUP_0F;
- *inst++ = MOVZX_r_rm8;
- *inst = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]);
+ if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
+ /* Low byte is accessible. */
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
+ FAIL_IF(!inst);
+ INC_SIZE(3 + 3);
+ /* Set low byte to conditional flag. */
+ inst[0] = GROUP_0F;
+ inst[1] = cond_set;
+ inst[2] = U8(MOD_REG | reg_map[dst]);
+
+ inst[3] = GROUP_0F;
+ inst[4] = MOVZX_r_rm8;
+ inst[5] = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]);
+ return SLJIT_SUCCESS;
+ }
+
+ if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 2);
+ FAIL_IF(!inst);
+ INC_SIZE(3 + 2);
+
+ /* Set low byte to conditional flag. */
+ inst[0] = GROUP_0F;
+ inst[1] = cond_set;
+ inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
+
+ inst[3] = OR_rm8_r8;
+ inst[4] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[dst]);
+ return SLJIT_SUCCESS;
+ }
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
+ FAIL_IF(!inst);
+ INC_SIZE(3 + 3);
+ /* Set low byte to conditional flag. */
+ inst[0] = GROUP_0F;
+ inst[1] = cond_set;
+ inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
+
+ inst[3] = GROUP_0F;
+ inst[4] = MOVZX_r_rm8;
+ inst[5] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[TMP_REG1]);
+
+ if (GET_OPCODE(op) < SLJIT_ADD)
+ return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+
+ SLJIT_SKIP_CHECKS(compiler);
+ return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
+#endif /* SLJIT_CONFIG_X86_64 */
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_reg)
+{
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ sljit_s32 dst = dst_reg;
+ sljit_sw dstw = 0;
+#endif /* SLJIT_CONFIG_X86_32 */
+ sljit_sw src2w = 0;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ CHECK_EXTRA_REGS(dst, dstw, (void)0);
+ CHECK_EXTRA_REGS(src1, src1w, (void)0);
+ CHECK_EXTRA_REGS(src2_reg, src2w, (void)0);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = type & SLJIT_32;
+#endif /* SLJIT_CONFIG_X86_64 */
+ type &= ~SLJIT_32;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ if (dst & SLJIT_MEM) {
+ if (src1 == SLJIT_IMM || (!(src1 & SLJIT_MEM) && (src2_reg & SLJIT_MEM))) {
+ EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+ src1 = src2_reg;
+ src1w = src2w;
+ type ^= 0x1;
+ } else
+ EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w);
+
+ dst_reg = TMP_REG1;
+ } else {
+#endif /* SLJIT_CONFIG_X86_32 */
+ if (dst_reg != src2_reg) {
+ if (dst_reg == src1) {
+ src1 = src2_reg;
+ src1w = src2w;
+ type ^= 0x1;
+ } else {
+ if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
+ EMIT_MOV(compiler, dst_reg, 0, src1, src1w);
+ src1 = src2_reg;
+ src1w = src2w;
+ type ^= 0x1;
+ } else
+ EMIT_MOV(compiler, dst_reg, 0, src2_reg, src2w);
+ }
+ }
+
+ if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
+ SLJIT_ASSERT(dst_reg != TMP_REG1);
+ EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+ src1 = TMP_REG1;
+ src1w = 0;
+ }
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ }
+#endif /* SLJIT_CONFIG_X86_32 */
+
+ if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
+ FAIL_IF(emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w));
+ else
+ FAIL_IF(emit_cmov_generic(compiler, type, dst_reg, src1, src1w));
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ if (dst_reg == TMP_REG1)
+ return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+#endif /* SLJIT_CONFIG_X86_32 */
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_freg)
+{
+ sljit_u8* inst;
+ sljit_uw size;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (dst_freg != src2_freg) {
+ if (dst_freg == src1) {
+ src1 = src2_freg;
+ src1w = 0;
+ type ^= 0x1;
+ } else
+ FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src2_freg, 0));
+ }
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!inst);
+ INC_SIZE(2);
+ inst[0] = U8(get_jump_code((sljit_uw)(type & ~SLJIT_32) ^ 0x1) - 0x10);
+
+ size = compiler->size;
+ FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src1, src1w));
+
+ inst[1] = U8(compiler->size - size);
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+ sljit_uw op;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ switch (reg_size) {
+ case 4:
+ op = EX86_SSE2;
+ break;
+ case 5:
+ if (!(cpu_feature_list & CPU_FEATURE_AVX2))
+ return SLJIT_ERR_UNSUPPORTED;
+ op = EX86_SSE2 | VEX_256;
+ break;
+ default:
+ return SLJIT_ERR_UNSUPPORTED;
+ }
+
+ if (!(srcdst & SLJIT_MEM))
+ alignment = reg_size;
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (elem_size == 2 || elem_size == 3) {
+ op |= alignment >= reg_size ? MOVAPS_x_xm : MOVUPS_x_xm;
+
+ if (elem_size == 3)
+ op |= EX86_PREF_66;
+
+ if (type & SLJIT_SIMD_STORE)
+ op += 1;
+ } else
+ return SLJIT_ERR_UNSUPPORTED;
+ } else {
+ op |= ((type & SLJIT_SIMD_STORE) ? MOVDQA_xm_x : MOVDQA_x_xm)
+ | (alignment >= reg_size ? EX86_PREF_66 : EX86_PREF_F3);
+ }
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (op & VEX_256)
+ return emit_vex_instruction(compiler, op, freg, 0, srcdst, srcdstw);
+
+ return emit_groupf(compiler, op, freg, srcdst, srcdstw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_u8 *inst;
+ sljit_u8 opcode = 0;
+ sljit_uw size;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (!(type & SLJIT_SIMD_FLOAT)) {
+ CHECK_EXTRA_REGS(src, srcw, (void)0);
+ }
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
+ return SLJIT_ERR_UNSUPPORTED;
+#else /* !SLJIT_CONFIG_X86_32 */
+ compiler->mode32 = 1;
+
+ if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
+ return SLJIT_ERR_UNSUPPORTED;
+#endif /* SLJIT_CONFIG_X86_32 */
+
+ if (cpu_feature_list & CPU_FEATURE_AVX2) {
+ if (reg_size < 4 || reg_size > 5)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (src != SLJIT_IMM && (reg_size == 5 || elem_size < 3 || !(type & SLJIT_SIMD_FLOAT))) {
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (elem_size >= 3)
+ compiler->mode32 = 0;
+#endif /* SLJIT_CONFIG_X86_64 */
+ FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, src, srcw));
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif /* SLJIT_CONFIG_X86_64 */
+ src = freg;
+ srcw = 0;
+ }
+
+ switch (elem_size) {
+ case 0:
+ size = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+ break;
+ case 1:
+ size = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+ break;
+ case 2:
+ size = ((type & SLJIT_SIMD_FLOAT) ? VBROADCASTSS_x_xm : VPBROADCASTD_x_xm) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+ break;
+ default:
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ size = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+#else /* !SLJIT_CONFIG_X86_32 */
+ size = ((type & SLJIT_SIMD_FLOAT) ? VBROADCASTSD_x_xm : VPBROADCASTQ_x_xm) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+#endif /* SLJIT_CONFIG_X86_32 */
+ break;
+ }
+
+ if (reg_size == 5)
+ size |= VEX_256;
+
+ return emit_vex_instruction(compiler, size, freg, 0, src, srcw);
+ }
+ } else if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (src == SLJIT_IMM) {
+ if (reg_size == 5)
+ return emit_vex_instruction(compiler, XORPD_x_xm | VEX_256 | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0);
+
+ return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, freg, freg, 0);
+ }
+
+ if (elem_size == 2 && freg != src) {
+ FAIL_IF(emit_sse2_load(compiler, 1, freg, src, srcw));
+ src = freg;
+ srcw = 0;
+ }
+
+ FAIL_IF(emit_groupf(compiler, (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2, freg, src, srcw));
+
+ if (elem_size == 2)
+ return emit_byte(compiler, 0);
+ return SLJIT_SUCCESS;
+ }
+
+ if (src == SLJIT_IMM) {
+ if (elem_size == 0) {
+ srcw = (sljit_u8)srcw;
+ srcw |= srcw << 8;
+ srcw |= srcw << 16;
+ elem_size = 2;
+ } else if (elem_size == 1) {
+ srcw = (sljit_u16)srcw;
+ srcw |= srcw << 16;
+ elem_size = 2;
+ }
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (elem_size == 2 && (sljit_s32)srcw == -1)
+ srcw = -1;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (srcw == 0 || srcw == -1) {
+ if (reg_size == 5)
+ return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0);
+
+ return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, freg, freg, 0);
+ }
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (elem_size == 3)
+ FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
+ else
+#endif /* SLJIT_CONFIG_X86_64 */
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
+
+ src = TMP_REG1;
+ srcw = 0;
+ }
+
+ size = 2;
+ opcode = MOVD_x_rm;
+
+ switch (elem_size) {
+ case 0:
+ if (!FAST_IS_REG(src)) {
+ opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */;
+ size = 3;
+ }
+ break;
+ case 1:
+ if (!FAST_IS_REG(src))
+ opcode = PINSRW_x_rm_i8;
+ break;
+ case 2:
+ break;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ case 3:
+ /* MOVQ */
+ compiler->mode32 = 0;
+ break;
+#endif /* SLJIT_CONFIG_X86_64 */
+ }
+
+ inst = emit_x86_instruction(compiler, size | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw);
+ FAIL_IF(!inst);
+ inst[0] = GROUP_0F;
+ inst[1] = opcode;
+
+ if (reg_size == 5) {
+ SLJIT_ASSERT(opcode == MOVD_x_rm);
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ size = VPBROADCASTD_x_xm;
+#else /* !SLJIT_CONFIG_X86_32 */
+ size = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm;
+#endif /* SLJIT_CONFIG_X86_32 */
+ return emit_vex_instruction(compiler, size | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
+ }
+
+ if (size == 3) {
+ SLJIT_ASSERT(opcode == 0x3a);
+ inst[2] = PINSRB_x_rm_i8;
+ }
+
+ if (opcode != MOVD_x_rm)
+ FAIL_IF(emit_byte(compiler, 0));
+
+ switch (elem_size) {
+ case 0:
+ FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
+ return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0);
+ case 1:
+ FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, freg, 0));
+ FAIL_IF(emit_byte(compiler, 0));
+ /* fallthrough */
+ default:
+ FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0));
+ return emit_byte(compiler, 0);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ case 3:
+ compiler->mode32 = 1;
+ FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0));
+ return emit_byte(compiler, 0x44);
+#endif /* SLJIT_CONFIG_X86_64 */
+ }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg, sljit_s32 lane_index,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_u8 *inst;
+ sljit_u8 opcode = 0;
+ sljit_uw size;
+ sljit_s32 freg_orig = freg;
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ sljit_s32 srcdst_is_ereg = 0;
+ sljit_s32 srcdst_orig = 0;
+ sljit_sw srcdstw_orig = 0;
+#endif /* SLJIT_CONFIG_X86_32 */
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+ if (reg_size == 5) {
+ if (!(cpu_feature_list & CPU_FEATURE_AVX2))
+ return SLJIT_ERR_UNSUPPORTED;
+ } else if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2)
+ return SLJIT_ERR_UNSUPPORTED;
+#else /* SLJIT_CONFIG_X86_32 */
+ if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
+ return SLJIT_ERR_UNSUPPORTED;
+#endif /* SLJIT_CONFIG_X86_32 */
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#else /* !SLJIT_CONFIG_X86_64 */
+ if (!(type & SLJIT_SIMD_FLOAT)) {
+ CHECK_EXTRA_REGS(srcdst, srcdstw, srcdst_is_ereg = 1);
+
+ if ((type & SLJIT_SIMD_STORE) && ((srcdst_is_ereg && elem_size < 2) || (elem_size == 0 && (type & SLJIT_SIMD_LANE_SIGNED) && FAST_IS_REG(srcdst) && reg_map[srcdst] >= 4))) {
+ srcdst_orig = srcdst;
+ srcdstw_orig = srcdstw;
+ srcdst = TMP_REG1;
+ srcdstw = 0;
+ }
+ }
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ if (lane_index == 0) {
+ if (!(type & SLJIT_SIMD_FLOAT)) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (elem_size == 3) {
+ compiler->mode32 = 0;
+ elem_size = 2;
+ }
+#endif /* SLJIT_CONFIG_X86_64 */
+ if (srcdst == SLJIT_IMM) {
+ if (elem_size == 0)
+ srcdstw = (sljit_u8)srcdstw;
+ else if (elem_size == 1)
+ srcdstw = (sljit_u16)srcdstw;
+
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
+ srcdst = TMP_REG1;
+ srcdstw = 0;
+ elem_size = 2;
+ }
+
+ if (elem_size == 2) {
+ if (reg_size == 4)
+ return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, srcdst, srcdstw);
+ return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw);
+ }
+ } else if (srcdst & SLJIT_MEM) {
+ SLJIT_ASSERT(elem_size == 2 || elem_size == 3);
+
+ if (reg_size == 4)
+ return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, srcdst, srcdstw);
+ return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, 0, srcdst, srcdstw);
+ } else if (elem_size == 3) {
+ if (reg_size == 4)
+ return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, srcdst, 0);
+ return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, 0, srcdst, 0);
+ }
+ }
+
+ if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
+ freg = TMP_FREG;
+ lane_index -= (1 << (4 - elem_size));
+ } else if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
+ FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw));
+ srcdst = TMP_FREG;
+ srcdstw = 0;
+ }
+
+ size = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0)
+ | ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | EX86_SSE2;
+
+ if (reg_size == 5)
+ FAIL_IF(emit_vex_instruction(compiler, size | VEX_256 | VEX_SSE2_OPV, freg, freg, freg, 0));
+ else
+ FAIL_IF(emit_groupf(compiler, size, freg, freg, 0));
+ } else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
+ FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0));
+ FAIL_IF(emit_byte(compiler, 1));
+
+ freg = TMP_FREG;
+ lane_index -= (1 << (4 - elem_size));
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (elem_size == 3) {
+ if (srcdst & SLJIT_MEM) {
+ if (type & SLJIT_SIMD_STORE)
+ size = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x;
+ else
+ size = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m;
+
+ FAIL_IF(emit_groupf(compiler, size | EX86_PREF_66 | EX86_SSE2, freg, srcdst, srcdstw));
+
+ /* In case of store, freg is not TMP_FREG. */
+ } else if (type & SLJIT_SIMD_STORE) {
+ if (lane_index == 1)
+ return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, freg, 0);
+ return emit_sse2_load(compiler, 0, srcdst, freg, 0);
+ } else {
+ if (lane_index == 1)
+ FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, freg, srcdst, 0));
+ else
+ FAIL_IF(emit_sse2_store(compiler, 0, freg, 0, srcdst));
+ }
+ } else if (type & SLJIT_SIMD_STORE) {
+ if (lane_index == 0)
+ return emit_sse2_store(compiler, 1, srcdst, srcdstw, freg);
+
+ if (srcdst & SLJIT_MEM) {
+ FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw));
+ return emit_byte(compiler, U8(lane_index));
+ }
+
+ if (srcdst == freg)
+ size = SHUFPS_x_xm | EX86_SSE2;
+ else {
+ if (cpu_feature_list & CPU_FEATURE_AVX) {
+ FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, freg, freg, 0));
+ return emit_byte(compiler, U8(lane_index));
+ }
+
+ switch (lane_index) {
+ case 1:
+ size = MOVSHDUP_x_xm | EX86_PREF_F3 | EX86_SSE2;
+ break;
+ case 2:
+ size = MOVHLPS_x_x | EX86_SSE2;
+ break;
+ default:
+ SLJIT_ASSERT(lane_index == 3);
+ size = PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2;
+ break;
+ }
+ }
+
+ FAIL_IF(emit_groupf(compiler, size, srcdst, freg, 0));
+
+ size &= 0xff;
+ if (size == SHUFPS_x_xm || size == PSHUFD_x_xm)
+ return emit_byte(compiler, U8(lane_index));
+
return SLJIT_SUCCESS;
+ } else {
+ if (lane_index != 0 || (srcdst & SLJIT_MEM)) {
+ FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw));
+ FAIL_IF(emit_byte(compiler, U8(lane_index << 4)));
+ } else
+ FAIL_IF(emit_sse2_store(compiler, 1, freg, 0, srcdst));
}
- /* Low byte is not accessible. */
- if (cpu_feature_list == 0)
- get_cpu_features();
+ if (freg != TMP_FREG || (type & SLJIT_SIMD_STORE))
+ return SLJIT_SUCCESS;
- if (cpu_feature_list & CPU_FEATURE_CMOV) {
- EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
- /* a xor reg, reg operation would overwrite the flags. */
- EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
+ SLJIT_ASSERT(reg_size == 5);
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
- FAIL_IF(!inst);
- INC_SIZE(3);
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0));
+ return emit_byte(compiler, 0x4e);
+ }
+
+ FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0));
+ return emit_byte(compiler, 1);
+ }
- *inst++ = GROUP_0F;
- /* cmovcc = setcc - 0x50. */
- *inst++ = U8(cond_set - 0x50);
- *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]);
+ if (srcdst == SLJIT_IMM) {
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
+ srcdst = TMP_REG1;
+ srcdstw = 0;
+ }
+
+ size = 3;
+
+ switch (elem_size) {
+ case 0:
+ opcode = (type & SLJIT_SIMD_STORE) ? PEXTRB_rm_x_i8 : PINSRB_x_rm_i8;
+ break;
+ case 1:
+ if (!(type & SLJIT_SIMD_STORE)) {
+ size = 2;
+ opcode = PINSRW_x_rm_i8;
+ } else
+ opcode = PEXTRW_rm_x_i8;
+ break;
+ case 2:
+ opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
+ break;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ case 3:
+ /* PINSRQ / PEXTRQ */
+ opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
+ compiler->mode32 = 0;
+ break;
+#endif /* SLJIT_CONFIG_X86_64 */
+ }
+
+ inst = emit_x86_instruction(compiler, size | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw);
+ FAIL_IF(!inst);
+ inst[0] = GROUP_0F;
+
+ if (size == 3) {
+ inst[1] = 0x3a;
+ inst[2] = opcode;
+ } else
+ inst[1] = opcode;
+
+ FAIL_IF(emit_byte(compiler, U8(lane_index)));
+
+ if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) {
+ if (freg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) {
+ SLJIT_ASSERT(reg_size == 5);
+
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0));
+ return emit_byte(compiler, 0x4e);
+ }
+
+ FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0));
+ return emit_byte(compiler, 1);
+ }
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ if (srcdst_orig & SLJIT_MEM)
+ return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
+#endif /* SLJIT_CONFIG_X86_32 */
+ return SLJIT_SUCCESS;
+ }
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (elem_size >= 3)
+ return SLJIT_SUCCESS;
+
+ compiler->mode32 = (type & SLJIT_32);
+
+ size = 2;
+
+ if (elem_size == 0)
+ size |= EX86_REX;
+
+ if (elem_size == 2) {
+ if (type & SLJIT_32)
return SLJIT_SUCCESS;
+
+ SLJIT_ASSERT(!(compiler->mode32));
+ size = 1;
+ }
+
+ inst = emit_x86_instruction(compiler, size, srcdst, 0, srcdst, 0);
+ FAIL_IF(!inst);
+
+ if (size != 1) {
+ inst[0] = GROUP_0F;
+ inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16);
+ } else
+ inst[0] = MOVSXD_r_rm;
+#else /* !SLJIT_CONFIG_X86_64 */
+ if (elem_size >= 2)
+ return SLJIT_SUCCESS;
+
+ FAIL_IF(emit_groupf(compiler, (elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16,
+ (srcdst_orig != 0 && FAST_IS_REG(srcdst_orig)) ? srcdst_orig : srcdst, srcdst, 0));
+
+ if (srcdst_orig & SLJIT_MEM)
+ return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
+#endif /* SLJIT_CONFIG_X86_64 */
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_s32 src_lane_index)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_uw pref;
+ sljit_u8 byte;
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ sljit_s32 opcode3 = TMP_REG1;
+#else /* !SLJIT_CONFIG_X86_32 */
+ sljit_s32 opcode3 = SLJIT_S0;
+#endif /* SLJIT_CONFIG_X86_32 */
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif /* SLJIT_CONFIG_X86_64 */
+ SLJIT_ASSERT(reg_map[opcode3] == 3);
+
+ if (reg_size == 5) {
+ if (!(cpu_feature_list & CPU_FEATURE_AVX2))
+ return SLJIT_ERR_UNSUPPORTED;
+ } else if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ pref = 0;
+ byte = U8(src_lane_index);
+
+ if (elem_size == 3) {
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 5) {
+ if (src_lane_index == 0)
+ return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
+
+ FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
+
+ byte = U8(byte | (byte << 2));
+ return emit_byte(compiler, U8(byte | (byte << 4)));
+ }
+
+ if (src_lane_index == 0)
+ return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, src, 0);
+
+ /* Changes it to SHUFPD_x_xm. */
+ pref = EX86_PREF_66;
+ } else if (elem_size != 2)
+ return SLJIT_ERR_UNSUPPORTED;
+ else if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 5) {
+ SLJIT_ASSERT(elem_size == 2);
+
+ if (src_lane_index == 0)
+ return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
+
+ FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
+
+ byte = 0x44;
+ if (src_lane_index >= 4) {
+ byte = 0xee;
+ src_lane_index -= 4;
+ }
+
+ FAIL_IF(emit_byte(compiler, byte));
+ FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0));
+ byte = U8(src_lane_index);
+ } else if (freg != src && (cpu_feature_list & CPU_FEATURE_AVX)) {
+ FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0));
+ } else {
+ if (freg != src)
+ FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, freg, src, 0));
+
+ FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, freg, freg, 0));
}
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1 + 3 + 3 + 1);
- *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
- /* Set al to conditional flag. */
- *inst++ = GROUP_0F;
- *inst++ = cond_set;
- *inst++ = MOD_REG | 0 /* eax */;
-
- *inst++ = GROUP_0F;
- *inst++ = MOVZX_r_rm8;
- *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | 0 /* eax */);
- *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
+ if (elem_size == 2) {
+ byte = U8(byte | (byte << 2));
+ byte = U8(byte | (byte << 4));
+ } else
+ byte = U8(byte | (byte << 1));
+
+ return emit_byte(compiler, U8(byte));
+ }
+
+ if (type & SLJIT_SIMD_TEST)
return SLJIT_SUCCESS;
+
+ if (elem_size == 0) {
+ if (reg_size == 5 && src_lane_index >= 16) {
+ FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
+ FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa));
+ src_lane_index &= 0x7;
+ src = freg;
+ }
+
+ if ((freg != src && !(cpu_feature_list & CPU_FEATURE_AVX2)) || src_lane_index != 0) {
+ pref = 0;
+
+ if ((src_lane_index & 0x3) == 0) {
+ pref = EX86_PREF_66;
+ byte = U8(src_lane_index >> 2);
+ } else if (src_lane_index < 8 && (src_lane_index & 0x1) == 0) {
+ pref = EX86_PREF_F2;
+ byte = U8(src_lane_index >> 1);
+ } else {
+ if (freg == src || !(cpu_feature_list & CPU_FEATURE_AVX2)) {
+ if (freg != src)
+ FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0));
+
+ FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, freg, 0));
+ } else
+ FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, freg, src, 0));
+
+ FAIL_IF(emit_byte(compiler, U8(src_lane_index)));
+ }
+
+ if (pref != 0) {
+ FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0));
+ FAIL_IF(emit_byte(compiler, byte));
+ }
+
+ src = freg;
+ }
+
+ if (cpu_feature_list & CPU_FEATURE_AVX2)
+ return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
+
+ SLJIT_ASSERT(reg_size == 4);
+ FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
+ return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0);
}
- if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
- SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
+ if ((cpu_feature_list & CPU_FEATURE_AVX2) && src_lane_index == 0 && elem_size <= 3) {
+ switch (elem_size) {
+ case 1:
+ pref = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+ break;
+ case 2:
+ pref = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+ break;
+ default:
+ pref = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+ break;
+ }
- if (dst != SLJIT_R0) {
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1 + 3 + 2 + 1);
- /* Set low register to conditional flag. */
- *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
- *inst++ = GROUP_0F;
- *inst++ = cond_set;
- *inst++ = MOD_REG | 0 /* eax */;
- *inst++ = OR_rm8_r8;
- *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
- *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
+ if (reg_size == 5)
+ pref |= VEX_256;
+
+ return emit_vex_instruction(compiler, pref, freg, 0, src, 0);
+ }
+
+ if (reg_size == 5) {
+ switch (elem_size) {
+ case 1:
+ byte = U8(src_lane_index & 0x3);
+ src_lane_index >>= 2;
+ pref = PSHUFLW_x_xm | VEX_256 | ((src_lane_index & 1) == 0 ? EX86_PREF_F2 : EX86_PREF_F3) | EX86_SSE2;
+ break;
+ case 2:
+ byte = U8(src_lane_index & 0x3);
+ src_lane_index >>= 1;
+ pref = PSHUFD_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2;
+ break;
+ case 3:
+ pref = 0;
+ break;
+ default:
+ FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
+ return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee));
}
- else {
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
+
+ if (pref != 0) {
+ FAIL_IF(emit_vex_instruction(compiler, pref, freg, 0, src, 0));
+ byte = U8(byte | (byte << 2));
+ FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
+
+ if (src_lane_index == 0)
+ return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
+
+ src = freg;
+ }
+
+ FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
+ byte = U8(src_lane_index);
+ byte = U8(byte | (byte << 2));
+ return emit_byte(compiler, U8(byte | (byte << 4)));
+ }
+
+ switch (elem_size) {
+ case 1:
+ byte = U8(src_lane_index & 0x3);
+ src_lane_index >>= 1;
+ pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3;
+
+ FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0));
+ byte = U8(byte | (byte << 2));
+ FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
+
+ if ((cpu_feature_list & CPU_FEATURE_AVX2) && pref == EX86_PREF_F2)
+ return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
+
+ src = freg;
+ /* fallthrough */
+ case 2:
+ byte = U8(src_lane_index);
+ byte = U8(byte | (byte << 2));
+ break;
+ default:
+ byte = U8(src_lane_index << 1);
+ byte = U8(byte | (byte << 2) | 0x4);
+ break;
+ }
+
+ FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0));
+ return emit_byte(compiler, U8(byte | (byte << 4)));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+ sljit_u8 opcode;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (reg_size == 5) {
+ if (!(cpu_feature_list & CPU_FEATURE_AVX2))
+ return SLJIT_ERR_UNSUPPORTED;
+ } else if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (elem_size != 2 || elem2_size != 3)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, freg, src, srcw);
+ return emit_vex_instruction(compiler, CVTPS2PD_x_xm | VEX_256 | EX86_SSE2, freg, 0, src, srcw);
+ }
+
+ switch (elem_size) {
+ case 0:
+ if (elem2_size == 1)
+ opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBW_x_xm : PMOVZXBW_x_xm;
+ else if (elem2_size == 2)
+ opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBD_x_xm : PMOVZXBD_x_xm;
+ else if (elem2_size == 3)
+ opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBQ_x_xm : PMOVZXBQ_x_xm;
+ else
+ return SLJIT_ERR_UNSUPPORTED;
+ break;
+ case 1:
+ if (elem2_size == 2)
+ opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWD_x_xm : PMOVZXWD_x_xm;
+ else if (elem2_size == 3)
+ opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWQ_x_xm : PMOVZXWQ_x_xm;
+ else
+ return SLJIT_ERR_UNSUPPORTED;
+ break;
+ case 2:
+ if (elem2_size == 3)
+ opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXDQ_x_xm : PMOVZXDQ_x_xm;
+ else
+ return SLJIT_ERR_UNSUPPORTED;
+ break;
+ default:
+ return SLJIT_ERR_UNSUPPORTED;
+ }
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, src, srcw);
+ return emit_vex_instruction(compiler, opcode | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, srcw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 dst_r;
+ sljit_uw pref;
+ sljit_u8 *inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
+
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ CHECK_EXTRA_REGS(dst, dstw, (void)0);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (reg_size == 4) {
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ pref = EX86_PREF_66 | EX86_SSE2_OP2;
+
+ switch (elem_size) {
+ case 1:
+ FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, freg, 0));
+ freg = TMP_FREG;
+ break;
+ case 2:
+ pref = EX86_SSE2_OP2;
+ break;
+ }
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ FAIL_IF(emit_groupf(compiler, (elem_size < 2 ? PMOVMSKB_r_x : MOVMSKPS_r_x) | pref, dst_r, freg, 0));
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = type & SLJIT_32;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (elem_size == 1) {
+ inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 8, dst_r, 0);
FAIL_IF(!inst);
- INC_SIZE(2 + 3 + 2 + 2);
- /* Set low register to conditional flag. */
- *inst++ = XCHG_r_rm;
- *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]);
- *inst++ = GROUP_0F;
- *inst++ = cond_set;
- *inst++ = MOD_REG | 1 /* ecx */;
- *inst++ = OR_rm8_r8;
- *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
- *inst++ = XCHG_r_rm;
- *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]);
+ inst[1] |= SHR;
}
+
+ if (dst_r == TMP_REG1)
+ return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+
return SLJIT_SUCCESS;
}
- /* Set TMP_REG1 to the bit. */
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
- FAIL_IF(!inst);
- INC_SIZE(1 + 3 + 3 + 1);
- *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
- /* Set al to conditional flag. */
- *inst++ = GROUP_0F;
- *inst++ = cond_set;
- *inst++ = MOD_REG | 0 /* eax */;
+ if (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2))
+ return SLJIT_ERR_UNSUPPORTED;
- *inst++ = GROUP_0F;
- *inst++ = MOVZX_r_rm8;
- *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
- *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
- if (GET_OPCODE(op) < SLJIT_ADD)
+ if (elem_size == 1) {
+ FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0));
+ FAIL_IF(emit_byte(compiler, 1));
+ FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, TMP_FREG, 0));
+ FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0));
+ } else {
+ pref = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2;
+
+ if (elem_size == 0)
+ pref = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2;
+ else if (elem_size == 3)
+ pref |= EX86_PREF_66;
+
+ FAIL_IF(emit_vex_instruction(compiler, pref, dst_r, 0, freg, 0));
+ }
+
+ if (dst_r == TMP_REG1) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = type & SLJIT_32;
+#endif /* SLJIT_CONFIG_X86_64 */
return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+ }
- SLJIT_SKIP_CHECKS(compiler);
- return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
+ return SLJIT_SUCCESS;
+}
+
+static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg, sljit_s32 src_freg)
+{
+ sljit_uw op = ((type & SLJIT_SIMD_FLOAT) ? MOVAPS_x_xm : MOVDQA_x_xm) | EX86_SSE2;
+
+ SLJIT_ASSERT(SLJIT_SIMD_GET_REG_SIZE(type) == 4);
+
+ if (!(type & SLJIT_SIMD_FLOAT) || SLJIT_SIMD_GET_ELEM_SIZE(type) == 3)
+ op |= EX86_PREF_66;
+
+ return emit_groupf(compiler, op, dst_freg, src_freg, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 needs_move = 0;
+ sljit_uw op = 0;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (reg_size == 5) {
+ if (!(cpu_feature_list & CPU_FEATURE_AVX2))
+ return SLJIT_ERR_UNSUPPORTED;
+ } else if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ switch (SLJIT_SIMD_GET_OPCODE(type)) {
+ case SLJIT_SIMD_OP2_AND:
+ op = (type & SLJIT_SIMD_FLOAT) ? ANDPD_x_xm : PAND_x_xm;
+
+ if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
+ op |= EX86_PREF_66;
+ break;
+ case SLJIT_SIMD_OP2_OR:
+ op = (type & SLJIT_SIMD_FLOAT) ? ORPD_x_xm : POR_x_xm;
+
+ if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
+ op |= EX86_PREF_66;
+ break;
+ case SLJIT_SIMD_OP2_XOR:
+ op = (type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm;
+
+ if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
+ op |= EX86_PREF_66;
+ break;
+ }
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ needs_move = dst_freg != src1_freg && dst_freg != src2_freg;
+
+ if (reg_size == 5 || (needs_move && (cpu_feature_list & CPU_FEATURE_AVX2))) {
+ if (reg_size == 5)
+ op |= VEX_256;
+
+ return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_freg, src1_freg, src2_freg, 0);
+ }
+
+ if (needs_move) {
+ FAIL_IF(emit_simd_mov(compiler, type, dst_freg, src1_freg));
+ } else if (dst_freg != src1_freg) {
+ SLJIT_ASSERT(dst_freg == src2_freg);
+ src2_freg = src1_freg;
+ }
+
+ FAIL_IF(emit_groupf(compiler, op | EX86_SSE2, dst_freg, src2_freg, 0));
+ return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
+ sljit_s32 mem_reg)
{
- sljit_u8* inst;
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
+
+ SLJIT_SKIP_CHECKS(compiler);
+ return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src_reg,
+ sljit_s32 mem_reg,
+ sljit_s32 temp_reg)
+{
+ sljit_uw pref;
+ sljit_s32 free_reg = TMP_REG1;
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ sljit_sw srcw = 0;
+ sljit_sw tempw = 0;
+#endif /* SLJIT_CONFIG_X86_32 */
CHECK_ERROR();
- CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+ CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
+ CHECK_EXTRA_REGS(src_reg, srcw, (void)0);
+ CHECK_EXTRA_REGS(temp_reg, tempw, (void)0);
+
+ SLJIT_ASSERT(FAST_IS_REG(src_reg) || src_reg == SLJIT_MEM1(SLJIT_SP));
+ SLJIT_ASSERT(FAST_IS_REG(temp_reg) || temp_reg == SLJIT_MEM1(SLJIT_SP));
+ op = GET_OPCODE(op);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- type &= ~SLJIT_32;
+ if ((src_reg & SLJIT_MEM) || (op == SLJIT_MOV_U8 && reg_map[src_reg] >= 4)) {
+ /* Src is virtual register or its low byte is not accessible. */
+ SLJIT_ASSERT(src_reg != SLJIT_R1);
+ free_reg = src_reg;
- if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
- return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
-#else
- if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
- return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
-#endif
+ EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw);
+ src_reg = TMP_REG1;
- /* ADJUST_LOCAL_OFFSET is not needed. */
- CHECK_EXTRA_REGS(src, srcw, (void)0);
+ if (mem_reg == src_reg)
+ mem_reg = TMP_REG1;
+ }
+#endif /* SLJIT_CONFIG_X86_32 */
+ if (temp_reg != SLJIT_R0) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- compiler->mode32 = type & SLJIT_32;
- type &= ~SLJIT_32;
-#endif
+ compiler->mode32 = 0;
- if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
- EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
- src = TMP_REG1;
- srcw = 0;
+ EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0);
+ EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, 0);
+
+ if (src_reg == SLJIT_R0)
+ src_reg = free_reg;
+ if (mem_reg == SLJIT_R0)
+ mem_reg = free_reg;
+#else /* !SLJIT_CONFIG_X86_64 */
+ if (src_reg == TMP_REG1 && mem_reg == SLJIT_R0 && (free_reg & SLJIT_MEM)) {
+ EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0);
+ EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0);
+ EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
+
+ mem_reg = SLJIT_R1;
+ free_reg = SLJIT_R1;
+ } else {
+ EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0);
+ EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
+
+ if (src_reg == SLJIT_R0)
+ src_reg = free_reg;
+ if (mem_reg == SLJIT_R0)
+ mem_reg = free_reg;
+ }
+#endif /* SLJIT_CONFIG_X86_64 */
}
- inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = U8(get_jump_code((sljit_uw)type) - 0x40);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = op != SLJIT_MOV && op != SLJIT_MOV_P;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ /* Lock prefix. */
+ FAIL_IF(emit_byte(compiler, GROUP_LOCK));
+
+ pref = 0;
+ if (op == SLJIT_MOV_U16)
+ pref = EX86_HALF_ARG | EX86_PREF_66;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (op == SLJIT_MOV_U8)
+ pref = EX86_REX;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ FAIL_IF(emit_groupf(compiler, (op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r) | pref, src_reg, SLJIT_MEM1(mem_reg), 0));
+
+ if (temp_reg != SLJIT_R0) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 0;
+ return emit_mov(compiler, SLJIT_R0, 0, TMP_REG1, 0);
+#else /* !SLJIT_CONFIG_X86_64 */
+ EMIT_MOV(compiler, SLJIT_R0, 0, free_reg, 0);
+ if (free_reg != TMP_REG1)
+ return emit_mov(compiler, free_reg, 0, (free_reg == SLJIT_R1) ? SLJIT_MEM1(SLJIT_SP) : TMP_REG1, 0);
+#endif /* SLJIT_CONFIG_X86_64 */
+ }
return SLJIT_SUCCESS;
}
@@ -3339,8 +4730,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
inst = (sljit_u8*)ensure_buf(compiler, 2);
PTR_FAIL_IF(!inst);
- *inst++ = 0;
- *inst++ = 2;
+ inst[0] = 0;
+ inst[1] = 2;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (dst & SLJIT_MEM)
@@ -3393,8 +4784,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj
inst = (sljit_u8*)ensure_buf(compiler, 2);
PTR_FAIL_IF(!inst);
- *inst++ = 0;
- *inst++ = 3;
+ inst[0] = 0;
+ inst[1] = 3;
return put_label;
}