gcc-patches@gcc.gnu.org
[Top] [All Lists]

[power7-meissner] Vectorize simple math builtins

Subject: [power7-meissner] Vectorize simple math builtins
From: Michael Meissner
Date: Mon, 13 Jul 2009 17:28:35 -0400
This patch vectorizes some of the simple math builtins on altivec and vsx.  It
was mainly written by Revital with some tweaks by me.

2009-07-10  Revital Eres <ERES@xxxxxxxxxx>
            Michael Meissner  <meissner@xxxxxxxxxxxxxxxxxx>

        * config/rs6000/rs6000.opt (-mvectorize-builtins): Add new debug
        switch.

        * config/rs6000/rs6000.c (rs6000_builtin_vectorized_function):
        Vectorize the functions that we can on Altivec and VSX.
        (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Define target
        hook.
        (bdesc_2arg): Add __builtin_vsx_cpsgndp and
        __builtin_vsx_cpsgnsp.

        * config/rs6000/rs6000.h (enum rs6000_builtins): Add copysign
        builtins.

Index: gcc/config/rs6000/rs6000.opt
===================================================================
--- gcc/config/rs6000/rs6000.opt        (revision 149448)
+++ gcc/config/rs6000/rs6000.opt        (working copy)
@@ -151,6 +151,10 @@ malign-branch-targets
 Target Undocumented Report Var(TARGET_ALIGN_BRANCH_TARGETS) Init(-1)
 ; Explicitly set/unset whether rs6000_align_branch_targets is set
 
+mvectorize-builtins
+Target Undocumented Report Var(TARGET_VECTORIZE_BUILTINS) Init(-1)
+; Explicitly control whether we vectorize the builtins or not.
+
 mupdate
 Target Report Var(TARGET_UPDATE) Init(1)
 Generate load/store with update instructions
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 149448)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -839,6 +839,7 @@ static rtx rs6000_emit_stack_reset (rs60
 static rtx rs6000_make_savres_rtx (rs6000_stack_t *, rtx, int,
                                   enum machine_mode, bool, bool, bool);
 static bool rs6000_reg_live_or_pic_offset_p (int);
+static tree rs6000_builtin_vectorized_function (unsigned int, tree, tree);
 static int rs6000_savres_strategy (rs6000_stack_t *, bool, int, int);
 static void rs6000_restore_saved_cr (rtx, int);
 static void rs6000_output_function_prologue (FILE *, HOST_WIDE_INT);
@@ -1395,6 +1396,10 @@ static const struct attribute_spec rs600
 #undef TARGET_HANDLE_OPTION
 #define TARGET_HANDLE_OPTION rs6000_handle_option
 
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+  rs6000_builtin_vectorized_function
+
 #undef TARGET_DEFAULT_TARGET_FLAGS
 #define TARGET_DEFAULT_TARGET_FLAGS \
   (TARGET_DEFAULT)
@@ -3013,6 +3018,178 @@ rs6000_parse_fpu_option (const char *opt
   return FPU_NONE;
 }
 
+/* Returns a function decl for a vectorized version of the builtin function
+   with builtin function code FN and the result vector type TYPE, or NULL_TREE
+   if it is not available.  */
+
+static tree
+rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
+                                   tree type_in)
+{
+  enum machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE
+      || !TARGET_VECTORIZE_BUILTINS)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  switch (fn)
+    {
+    case BUILT_IN_COPYSIGN:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+         && out_mode == DFmode && out_n == 2
+         && in_mode == DFmode && in_n == 2)
+       return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
+      break;
+    case BUILT_IN_COPYSIGNF:
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+         && out_mode == SFmode && out_n == 4
+         && in_mode == SFmode && in_n == 4)
+       return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
+      break;
+    case BUILT_IN_SQRT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+         && out_mode == DFmode && out_n == 2
+         && in_mode == DFmode && in_n == 2)
+       return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
+      break;
+    case BUILT_IN_SQRTF:
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+         && out_mode == SFmode && out_n == 4
+         && in_mode == SFmode && in_n == 4)
+       return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
+      break;
+    case BUILT_IN_CEIL:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+         && out_mode == DFmode && out_n == 2
+         && in_mode == DFmode && in_n == 2)
+       return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
+      break;
+    case BUILT_IN_CEILF:
+      if (out_mode != SFmode || out_n != 4
+         || in_mode != SFmode || in_n != 4)
+       break;
+      if (VECTOR_UNIT_VSX_P (V4SFmode))
+       return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+       return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
+      break;
+    case BUILT_IN_FLOOR:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+         && out_mode == DFmode && out_n == 2
+         && in_mode == DFmode && in_n == 2)
+       return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
+      break;
+    case BUILT_IN_FLOORF:
+      if (out_mode != SFmode || out_n != 4
+         || in_mode != SFmode || in_n != 4)
+       break;
+      if (VECTOR_UNIT_VSX_P (V4SFmode))
+       return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+       return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
+      break;
+    case BUILT_IN_TRUNC:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+         && out_mode == DFmode && out_n == 2
+         && in_mode == DFmode && in_n == 2)
+       return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
+      break;
+    case BUILT_IN_TRUNCF:
+      if (out_mode != SFmode || out_n != 4
+         || in_mode != SFmode || in_n != 4)
+       break;
+      if (VECTOR_UNIT_VSX_P (V4SFmode))
+       return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+       return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
+      break;
+    case BUILT_IN_NEARBYINT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+         && flag_unsafe_math_optimizations
+         && out_mode == DFmode && out_n == 2
+         && in_mode == DFmode && in_n == 2)
+       return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
+      break;
+    case BUILT_IN_NEARBYINTF:
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+         && flag_unsafe_math_optimizations
+         && out_mode == SFmode && out_n == 4
+         && in_mode == SFmode && in_n == 4)
+       return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
+      break;
+    case BUILT_IN_RINT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+         && !flag_trapping_math
+          && out_mode == DFmode && out_n == 2
+          && in_mode == DFmode && in_n == 2)
+        return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
+      break;
+    case BUILT_IN_RINTF:
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+         && !flag_trapping_math
+          && out_mode == SFmode && out_n == 4
+          && in_mode == SFmode && in_n == 4)
+        return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
+      break;
+    case BUILT_IN_FABS:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+         && out_mode == DFmode && out_n == 2
+         && in_mode == DFmode && in_n == 2)
+       return rs6000_builtin_decls[VSX_BUILTIN_XVABSDP];
+      break;
+    case BUILT_IN_FABSF:
+      if (out_mode != SFmode || out_n != 4
+         || in_mode != SFmode || in_n != 4)
+       break;
+      if (VECTOR_UNIT_VSX_P (V4SFmode))
+       return rs6000_builtin_decls[VSX_BUILTIN_XVABSSP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+       return rs6000_builtin_decls[ALTIVEC_BUILTIN_ABS_V4SF];
+      break;
+    case BUILT_IN_FMAX:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+         && out_mode == DFmode && out_n == 2
+         && in_mode == DFmode && in_n == 2)
+       return rs6000_builtin_decls[VSX_BUILTIN_XVMAXDP];
+      break;
+    case BUILT_IN_FMAXF:
+      if (out_mode != SFmode || out_n != 4
+         || in_mode != SFmode || in_n != 4)
+       break;
+      if (VECTOR_UNIT_VSX_P (V4SFmode))
+       return rs6000_builtin_decls[VSX_BUILTIN_XVMAXSP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+       return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMAXFP];
+      break;
+    case BUILT_IN_FMIN:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+         && out_mode == DFmode && out_n == 2
+         && in_mode == DFmode && in_n == 2)
+       return rs6000_builtin_decls[VSX_BUILTIN_XVMINDP];
+      break;
+    case BUILT_IN_FMINF:
+      if (out_mode != SFmode || out_n != 4
+         || in_mode != SFmode || in_n != 4)
+       break;
+      if (VECTOR_UNIT_VSX_P (V4SFmode))
+       return rs6000_builtin_decls[VSX_BUILTIN_XVMINSP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+       return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMINFP];
+      break;
+    default:
+      ;
+    }
+  return NULL_TREE;
+}
+
+
 /* Implement TARGET_HANDLE_OPTION.  */
 
 static bool
@@ -8521,6 +8698,8 @@ static const struct builtin_description 
   { MASK_VSX, CODE_FOR_smaxdf3, "__builtin_vsx_xsmaxdp", VSX_BUILTIN_XSMAXDP },
   { MASK_VSX, CODE_FOR_vsx_tdivdf3_fe, "__builtin_vsx_xstdivdp_fe", 
VSX_BUILTIN_XSTDIVDP_FE },
   { MASK_VSX, CODE_FOR_vsx_tdivdf3_fg, "__builtin_vsx_xstdivdp_fg", 
VSX_BUILTIN_XSTDIVDP_FG },
+  { MASK_VSX, CODE_FOR_vsx_copysignv2df3, "__builtin_vsx_cpsgndp", 
VSX_BUILTIN_CPSGNDP },
+  { MASK_VSX, CODE_FOR_vsx_copysignv4sf3, "__builtin_vsx_cpsgnsp", 
VSX_BUILTIN_CPSGNSP },
 
   { MASK_VSX, CODE_FOR_vsx_concat_v2df, "__builtin_vsx_concat_2df", 
VSX_BUILTIN_CONCAT_2DF },
   { MASK_VSX, CODE_FOR_vsx_concat_v2di, "__builtin_vsx_concat_2di", 
VSX_BUILTIN_CONCAT_2DI },
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h  (revision 149448)
+++ gcc/config/rs6000/rs6000.h  (working copy)
@@ -3228,6 +3228,8 @@ enum rs6000_builtins
   VSX_BUILTIN_XSRSQRTEDP,
   VSX_BUILTIN_XSSQRTDP,
   VSX_BUILTIN_XSSUBDP,
+  VSX_BUILTIN_CPSGNDP,
+  VSX_BUILTIN_CPSGNSP,
   VSX_BUILTIN_XSTDIVDP_FE,
   VSX_BUILTIN_XSTDIVDP_FG,
   VSX_BUILTIN_XSTSQRTDP_FE,


-- 
Michael Meissner, IBM
4 Technology Place Drive, MS 2203A, Westford, MA, 01886, USA
meissner@xxxxxxxxxxxxxxxxxx

<Prev in Thread] Current Thread [Next in Thread>