From dean-list-gcc-patches@arctic.org Sun Apr 20 11:53:05 2003
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
Date: Sun, 20 Apr 2003 11:52:36 -0700 (PDT)
From: dean gaudet <dean-list-gcc-patches@arctic.org>
To: gcc-patches@gcc.gnu.org
Subject: [PATCH][x86] movd support for mmx regs
X-comment: visit http://arctic.org/~dean/legal for information regarding
    copyright and disclaimer.

i need the more efficient "movd" to transfer the bottom 32-bits of mmx
regs between mem/gpr.  the existing code for _mm_cvtsi32_si64 and
_mm_cvtsi64_si32 uses casts to long long, and this generates some pretty
crappy code compared to the direct use of movd...

there was already support for the xmm version of movd, used in the
intrinsics _mm_cvtsi32_si128/_mm_cvtsi128_si32, so i cloned that with
suitable modifications.

-dean

Index: gcc/config/i386/i386.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.560
diff -u -r1.560 i386.c
--- gcc/config/i386/i386.c	11 Apr 2003 21:27:13 -0000	1.560
+++ gcc/config/i386/i386.c	20 Apr 2003 18:37:45 -0000
@@ -13330,6 +13330,11 @@
   tree void_ftype_pchar_v16qi
     = build_function_type_list (void_type_node,
 			        pchar_type_node, V16QI_type_node, NULL_TREE);
+  tree v2si_ftype_pcint
+    = build_function_type_list (V2SI_type_node, pcint_type_node, NULL_TREE);
+  tree void_ftype_pcint_v2si
+    = build_function_type_list (void_type_node,
+			        pcint_type_node, V2SI_type_node, NULL_TREE);
   tree v4si_ftype_pcint
     = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
   tree void_ftype_pcint_v4si
@@ -13435,6 +13440,9 @@
   def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
   def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);

+  def_builtin (MASK_MMX, "__builtin_ia32_loadd_si64", v2si_ftype_pcint, IX86_BUILTIN_LOADD_SI64);
+  def_builtin (MASK_MMX, "__builtin_ia32_si64_stored", void_ftype_pcint_v2si, IX86_BUILTIN_SI64_STORED);
+
   def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
   def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
   def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
@@ -14430,6 +14438,11 @@
       return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
     case IX86_BUILTIN_STORED:
       return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
+
+    case IX86_BUILTIN_LOADD_SI64:
+      return ix86_expand_unop_builtin (CODE_FOR_mmx_loadd, arglist, target, 1);
+    case IX86_BUILTIN_SI64_STORED:
+      return ix86_expand_store_builtin (CODE_FOR_mmx_stored, arglist);

     default:
       break;
Index: gcc/config/i386/i386.h
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.333
diff -u -r1.333 i386.h
--- gcc/config/i386/i386.h	17 Apr 2003 23:18:55 -0000	1.333
+++ gcc/config/i386/i386.h	20 Apr 2003 18:37:46 -0000
@@ -2148,6 +2148,8 @@
   IX86_BUILTIN_MOVQ,
   IX86_BUILTIN_LOADD,
   IX86_BUILTIN_STORED,
+  IX86_BUILTIN_LOADD_SI64,
+  IX86_BUILTIN_SI64_STORED,

   IX86_BUILTIN_CLRTI,

Index: gcc/config/i386/i386.md
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.455
diff -u -r1.455 i386.md
--- gcc/config/i386/i386.md	15 Apr 2003 13:33:57 -0000	1.455
+++ gcc/config/i386/i386.md	20 Apr 2003 18:37:47 -0000
@@ -18860,6 +18860,28 @@
   operands[2] = CONST0_RTX (V2DFmode);
 })

+(define_insn "mmx_loadd"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_merge:V2SI
+	 (vec_duplicate:V2SI (match_operand:SI 1 "nonimmediate_operand" "mr"))
+	 (const_vector:V2SI [(const_int 0)
+			     (const_int 0)])
+	 (const_int 1)))]
+  "TARGET_MMX"
+  "movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxmov")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_stored"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=mr")
+	(vec_select:SI
+	 (match_operand:V2SI 1 "register_operand" "y")
+	 (parallel [(const_int 0)])))]
+  "TARGET_MMX"
+  "movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxmov")
+   (set_attr "mode" "DI")])
+
 (define_insn "movv8qi_internal"
   [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m")
 	(match_operand:V8QI 1 "vector_move_operand" "C,ym,y"))]
Index: gcc/config/i386/mmintrin.h
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/config/i386/mmintrin.h,v
retrieving revision 1.6
diff -u -r1.6 mmintrin.h
--- gcc/config/i386/mmintrin.h	22 Feb 2003 02:09:06 -0000	1.6
+++ gcc/config/i386/mmintrin.h	20 Apr 2003 18:37:47 -0000
@@ -49,11 +49,10 @@
 }

 /* Convert I to a __m64 object.  The integer is zero-extended to 64-bits.  */
-static __inline __m64
-_mm_cvtsi32_si64 (int __i)
+static __inline __m64
+_mm_cvtsi32_si64 (int __A)
 {
-  long long __tmp = (unsigned int)__i;
-  return (__m64) __tmp;
+  return (__m64) __builtin_ia32_loadd_si64 (&__A);
 }

 #ifdef __x86_64__
@@ -76,7 +75,8 @@
 static __inline int
 _mm_cvtsi64_si32 (__m64 __i)
 {
-  long long __tmp = (long long)__i;
+  int __tmp;
+  __builtin_ia32_si64_stored (&__tmp, (__v2si)__i);
   return __tmp;
 }

