[ISel] Add pattern matching for depositing subreg value (#75978)

Depositing value into the lowest byte/word is a common code pattern. This patch improves the code generation for it to avoid redundant AND and OR operations.
author: David Li <57157229+david-xl@users.noreply.github.com> 2023-12-21 10:18:57 -0800
committer: GitHub <noreply@github.com> 2023-12-21 10:18:57 -0800
commit: f44079db22036d0ade2cf3d2e5a24bde5d378efd (patch)
tree: 43c03e0f9072bb208d033dcb7ae0a8c35554d97e
parent: 6a870cca70e3df6070bdcd2768d6569daae8e1ba (diff)
2 files changed, 110 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 457833f8cc33..c77c77ee4a3e 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1515,6 +1515,23 @@ def : Pat<(X86add_flag_nocf GR32:$src1, 128),
 def : Pat<(X86add_flag_nocf GR64:$src1, 128),
           (SUB64ri32 GR64:$src1, -128)>;
 
+// Depositing value to 8/16 bit subreg:
+def : Pat<(or (and GR64:$dst, -256), 
+              (i64 (zextloadi8 addr:$src))),
+          (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm  i8mem:$src), sub_8bit)>; 
+
+def : Pat<(or (and GR32:$dst, -256), 
+              (i32 (zextloadi8 addr:$src))),
+          (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm  i8mem:$src), sub_8bit)>; 
+
+def : Pat<(or (and GR64:$dst, -65536), 
+              (i64 (zextloadi16 addr:$src))),
+          (INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>;
+
+def : Pat<(or (and GR32:$dst, -65536), 
+              (i32 (zextloadi16 addr:$src))),
+          (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>; 
+
 // The same trick applies for 32-bit immediate fields in 64-bit
 // instructions.
 def : Pat<(add GR64:$src1, 0x0000000080000000),
diff --git a/llvm/test/CodeGen/X86/insert.ll b/llvm/test/CodeGen/X86/insert.ll
new file mode 100644
index 000000000000..381de2ecaa16
--- /dev/null
+++ b/llvm/test/CodeGen/X86/insert.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
+
+define i64 @sub8(i64 noundef %res, ptr %byte) {
+; X86-LABEL: sub8:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb (%ecx), %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub8:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movb (%rsi), %al
+; X64-NEXT:    retq
+entry:
+  %and = and i64 %res, -256
+  %d = load i8, ptr %byte, align 1
+  %conv2 = zext i8 %d to i64
+  %or = or i64 %and, %conv2
+  ret i64 %or
+}
+
+define i64 @sub16(i64 noundef %res, ptr %byte) {
+; X86-LABEL: sub16:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll $16, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzwl (%eax), %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub16:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movw (%rsi), %ax
+; X64-NEXT:    retq
+entry:
+  %and = and i64 %res, -65536
+  %d = load i16, ptr %byte, align 1
+  %conv2 = zext i16 %d to i64
+  %or = or i64 %and, %conv2
+  ret i64 %or
+}
+
+define i32 @sub8_32(i32 noundef %res, ptr %byte) {
+; X86-LABEL: sub8_32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb (%ecx), %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub8_32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movb (%rsi), %al
+; X64-NEXT:    retq
+entry:
+  %and = and i32 %res, -256
+  %d = load i8, ptr %byte, align 1
+  %conv2 = zext i8 %d to i32
+  %or = or i32 %and, %conv2
+  ret i32 %or
+}
+
+define i32 @sub16_32(i32 noundef %res, ptr %byte) {
+; X86-LABEL: sub16_32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll $16, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzwl (%eax), %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub16_32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movw (%rsi), %ax
+; X64-NEXT:    retq
+entry:
+  %and = and i32 %res, -65536
+  %d = load i16, ptr %byte, align 1
+  %conv2 = zext i16 %d to i32
+  %or = or i32 %and, %conv2
+  ret i32 %or
+}
author	David Li <57157229+david-xl@users.noreply.github.com>	2023-12-21 10:18:57 -0800
committer	GitHub <noreply@github.com>	2023-12-21 10:18:57 -0800
commit	f44079db22036d0ade2cf3d2e5a24bde5d378efd (patch)
tree	43c03e0f9072bb208d033dcb7ae0a8c35554d97e
parent	6a870cca70e3df6070bdcd2768d6569daae8e1ba (diff)