gcc-patches@gcc.gnu.org
[Top] [All Lists]

[RFA/RFC] PR24230, problems when casting AltiVec constants

Subject: [RFA/RFC] PR24230, problems when casting AltiVec constants
From: Paolo Bonzini
Date: Tue, 01 Nov 2005 23:09:50 +0100
This patch fixes PR24230 by making the PowerPC back-end able to recognize more constants as "easy". Basically, a vspltis[bhw] instruction can now be used for all of V16QImode, V8HImode, V4SImode.

Previously, only "smaller" modes were tried: V8HImode constants could be made with vspltisb, but V16QImode constants could not be made with vspltish. In the PR, the vspltish was created with an AltiVec builtin, and after a cast to V16QImode CSE created a REQ_EQUAL note:

(insn:HI 26 64 56 2 (set (mem/v/c/i:V16QI (reg:SI 9 9) [0 tmp+0 S16 A128])
       (reg:V16QI 77 0)) 467 {altivec_stvx_v16qi} (insn_list:REG_DEP_TRUE 22
(nil))
   (expr_list:REG_EQUAL (const_vector:V16QI [
               (const_int 0 [0x0]) (const_int 5 [0x5])
               (const_int 0 [0x0]) (const_int 5 [0x5])
               (const_int 0 [0x0]) (const_int 5 [0x5])
               (const_int 0 [0x0]) (const_int 5 [0x5])
               (const_int 0 [0x0]) (const_int 5 [0x5])
               (const_int 0 [0x0]) (const_int 5 [0x5])
               (const_int 0 [0x0]) (const_int 5 [0x5])
               (const_int 0 [0x0]) (const_int 5 [0x5])
           ])
       (nil)))


In the end, this caused a reload failure when trying to remat the constant. With this patch, the back-end is able to remat the constant with a vspltish.

The patch was not tested very much (will do so in the next few days). I include two testcases, one tests code correctness for the newly recognized constants, and the other tests reloading constants for all combinations of vsplit[bhw] and mode.

Ok if bootstrapping/regtesting passes?

Paolo

2005-11-01  Paolo Bonzini  <bonzini@xxxxxxx>

        PR target/24230

        * config/rs6000/rs6000.c (easy_vector_splat_const, easy_vector_same,
        gen_easy_vector_constant_add_self): Delete.
        (vspltis_constant, easy_altivec_constant, gen_easy_altivec_constant):
        New.
        (output_vec_const_move): Use gen_easy_altivec_constant.
        (rs6000_expand_vector_init): Do not emit a set of a VEC_DUPLICATE.
        * config/rs6000/predicates.md (easy_vector_constant): Reorganize tests.
        easy_vector_constant_add_self): Rewritten.
        * config/rs6000/rs6000-protos.h (easy_vector_splat_const,
        easy_vector_same, gen_easy_vector_constant_add_self): Remove prototype.
        (easy_altivec_constant, gen_easy_altivec_constant): Add prototype.
        * config/rs6000/altivec.md (easy_vector_constant_add_self spitters):
        Macroize and adjust for the other changes.

--- gcc-old/gcc/config/rs6000/rs6000-protos.h   2005-09-28 09:40:36.000000000 
+0200
+++ gcc/gcc/config/rs6000/rs6000-protos.h       2005-11-01 21:38:39.000000000 
+0100
@@ -32,8 +32,7 @@
 extern void rs6000_va_start (tree, rtx);
 #endif /* TREE_CODE */
 
-extern int easy_vector_same (rtx, enum machine_mode);
-extern int easy_vector_splat_const (int, enum machine_mode);
+extern bool easy_altivec_constant (rtx, enum machine_mode);
 extern bool macho_lo_sum_memory_operand (rtx, enum machine_mode);
 extern int num_insns_constant (rtx, enum machine_mode);
 extern int num_insns_constant_wide (HOST_WIDE_INT);
@@ -48,7 +47,7 @@
 
 extern rtx rs6000_got_register (rtx);
 extern rtx find_addr_reg (rtx);
-extern rtx gen_easy_vector_constant_add_self (rtx);
+extern rtx gen_easy_altivec_constant (rtx);
 extern const char *output_vec_const_move (rtx *);
 extern void rs6000_expand_vector_init (rtx, rtx);
 extern void rs6000_expand_vector_set (rtx, rtx, int);
--- gcc-old/gcc/config/rs6000/rs6000.c  2005-09-28 09:40:37.000000000 +0200
+++ gcc/gcc/config/rs6000/rs6000.c      2005-11-01 21:24:55.000000000 +0100
@@ -2038,73 +2038,150 @@
     }
 }
 
-/* Returns the constant for the splat instruction, if exists.  */
 
-int
-easy_vector_splat_const (int cst, enum machine_mode mode)
+/* Return true if OP can be synthesized with a particular vspltisb, vspltish 
+   or vspltisw instruction.  OP is a CONST_VECTOR.  Which instruction is used
+   depends on STEP and COPIES, one of which will be 1.  If COPIES > 1,
+   all items are set to the same value and contain COPIES replicas of the
+   vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
+   operand and the others are set to the value of the operand's msb.  */
+
+static bool
+vspltis_constant (rtx op, unsigned step, unsigned copies)
 {
-  switch (mode)
-    {
-    case V4SImode:
-      if (EASY_VECTOR_15 (cst)
-         || EASY_VECTOR_15_ADD_SELF (cst))
-       return cst;
-      if ((cst & 0xffff) != ((cst >> 16) & 0xffff))
-       break;
-      cst = cst >> 16;
-      /* Fall thru */
+  enum machine_mode mode = GET_MODE (op);
+  enum machine_mode inner = GET_MODE_INNER (mode);
 
-    case V8HImode:
-      if (EASY_VECTOR_15 (cst)
-         || EASY_VECTOR_15_ADD_SELF (cst))
-       return cst;
-      if ((cst & 0xff) != ((cst >> 8) & 0xff))
-       break;
-      cst = cst >> 8;
-      /* Fall thru */
+  unsigned i;
+  unsigned nunits = GET_MODE_NUNITS (mode);
+  unsigned bitsize = GET_MODE_BITSIZE (inner);
+  unsigned mask = GET_MODE_MASK (inner);
+
+  rtx last = CONST_VECTOR_ELT (op, nunits - 1);
+  HOST_WIDE_INT val = INTVAL (last);
+  HOST_WIDE_INT splat_val = val;
+  HOST_WIDE_INT msb_val = val > 0 ? 0 : -1;
+
+  /* Construct the value to be splatted, if possible.  If not, return 0.  */
+  for (i = 2; i <= copies; i *= 2)
+    {
+      HOST_WIDE_INT small_val;
+      bitsize /= 2;
+      small_val = splat_val >> bitsize;
+      mask >>= bitsize;
+      if (splat_val != ((small_val << bitsize) | (small_val & mask)))
+       return false;
+      splat_val = small_val;
+    }
 
-    case V16QImode:
-      if (EASY_VECTOR_15 (cst)
-         || EASY_VECTOR_15_ADD_SELF (cst))
-       return cst;
-    default:
-      break;
+  /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw].  */
+  if (EASY_VECTOR_15 (splat_val))
+    ;
+
+  /* Also check if we can splat and then add the result to itself.  Do so if
+     the value is positive, of if the splat instruction is using OP's mode;
+     for splat_val < 0, the splat and the add should use the same mode.  */
+  else if (EASY_VECTOR_15_ADD_SELF (splat_val)
+           && (splat_val >= 0 || (step == 1 && copies == 1)))
+    ;
+
+  else
+    return false;
+
+  /* Check if VAL is present in every STEP-th element, and the
+     other elements are filled with its most significant bit.  */
+  for (i = 0; i < nunits - 1; ++i)
+    {
+      HOST_WIDE_INT desired_val;
+      if (((i + 1) & (step - 1)) == 0)
+       desired_val = val;
+      else
+       desired_val = msb_val;
+
+      if (desired_val != INTVAL (CONST_VECTOR_ELT (op, i)))
+       return false;
     }
-  return 0;
+
+  return true;
 }
 
-/* Return nonzero if all elements of a vector have the same value.  */
 
-int
-easy_vector_same (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+/* Return true if OP is of the given MODE and can be synthesized 
+   with a vspltisb, vspltish or vspltisw.  */
+
+bool
+easy_altivec_constant (rtx op, enum machine_mode mode)
 {
-  int units, i, cst;
+  unsigned step, copies;
 
-  units = CONST_VECTOR_NUNITS (op);
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+  else if (mode != GET_MODE (op))
+    return false;
 
-  cst = INTVAL (CONST_VECTOR_ELT (op, 0));
-  for (i = 1; i < units; ++i)
-    if (INTVAL (CONST_VECTOR_ELT (op, i)) != cst)
-      break;
-  if (i == units && easy_vector_splat_const (cst, mode))
-    return 1;
-  return 0;
+  /* Start with a vspltisw.  */
+  step = GET_MODE_NUNITS (mode) / 4;
+  copies = 1;
+
+  if (vspltis_constant (op, step, copies))
+    return true;
+
+  /* Then try with a vspltish.  */
+  if (step == 1)
+    copies <<= 1;
+  else
+    step >>= 1;
+
+  if (vspltis_constant (op, step, copies))
+    return true;
+
+  /* And finally a vspltisb.  */
+  if (step == 1)
+    copies <<= 1;
+  else
+    step >>= 1;
+
+  if (vspltis_constant (op, step, copies))
+    return true;
+
+  return false;
 }
 
-/* Generate easy_vector_constant out of a easy_vector_constant_add_self.  */
+/* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
+   result is OP.  Abort if it is not possible.  */
 
 rtx
-gen_easy_vector_constant_add_self (rtx op)
+gen_easy_altivec_constant (rtx op)
 {
-  int i, units;
-  rtvec v;
-  units = GET_MODE_NUNITS (GET_MODE (op));
-  v = rtvec_alloc (units);
-
-  for (i = 0; i < units; i++)
-    RTVEC_ELT (v, i) =
-      GEN_INT (INTVAL (CONST_VECTOR_ELT (op, i)) >> 1);
-  return gen_rtx_raw_CONST_VECTOR (GET_MODE (op), v);
+  enum machine_mode mode = GET_MODE (op);
+  int nunits = GET_MODE_NUNITS (mode);
+  rtx last = CONST_VECTOR_ELT (op, nunits - 1);
+  unsigned step = nunits / 4;
+  unsigned copies = 1;
+
+  /* Start with a vspltisw.  */
+  if (vspltis_constant (op, step, copies))
+    return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, last));
+
+  /* Then try with a vspltish.  */
+  if (step == 1)
+    copies <<= 1;
+  else
+    step >>= 1;
+
+  if (vspltis_constant (op, step, copies))
+    return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, last));
+
+  /* And finally a vspltisb.  */
+  if (step == 1)
+    copies <<= 1;
+  else
+    step >>= 1;
+
+  if (vspltis_constant (op, step, copies))
+    return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, last));
+
+  gcc_unreachable ();
 }
 
 const char *
@@ -2123,44 +2200,26 @@
 
   if (TARGET_ALTIVEC)
     {
+      rtx splat_vec;
       if (zero_constant (vec, mode))
        return "vxor %0,%0,%0";
 
-      gcc_assert (easy_vector_constant (vec, mode));
+      splat_vec = gen_easy_altivec_constant (vec);
+      gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
+      operands[1] = XEXP (splat_vec, 0);
+      if (!EASY_VECTOR_15 (INTVAL (operands[1])))
+       return "#";
 
-      operands[1] = GEN_INT (cst);
-      switch (mode)
+      switch (GET_MODE (splat_vec))
        {
        case V4SImode:
-         if (EASY_VECTOR_15 (cst))
-           {
-             operands[1] = GEN_INT (cst);
-             return "vspltisw %0,%1";
-           }
-         else if (EASY_VECTOR_15_ADD_SELF (cst))
-           return "#";
-         cst = cst >> 16;
-         /* Fall thru */
+         return "vspltisw %0,%1";
 
        case V8HImode:
-         if (EASY_VECTOR_15 (cst))
-           {
-             operands[1] = GEN_INT (cst);
-             return "vspltish %0,%1";
-           }
-         else if (EASY_VECTOR_15_ADD_SELF (cst))
-           return "#";
-         cst = cst >> 8;
-         /* Fall thru */
+         return "vspltish %0,%1";
 
        case V16QImode:
-         if (EASY_VECTOR_15 (cst))
-           {
-             operands[1] = GEN_INT (cst);
-             return "vspltisb %0,%1";
-           }
-         else if (EASY_VECTOR_15_ADD_SELF (cst))
-           return "#";
+         return "vspltisb %0,%1";
 
        default:
          gcc_unreachable ();
@@ -2216,11 +2275,10 @@
                                  gen_rtx_XOR (mode, target, target)));
          return;
        }
-      else if (mode != V4SFmode && easy_vector_same (vals, mode))
+      else if (mode != V4SFmode && easy_vector_constant (vals, mode))
        {
          /* Splat immediate.  */
-         x = gen_rtx_VEC_DUPLICATE (mode, CONST_VECTOR_ELT (vals, 0));
-         emit_insn (gen_rtx_SET (VOIDmode, target, x));
+         emit_insn (gen_rtx_SET (VOIDmode, target, vals));
          return;
        }
       else if (all_same)
--- gcc-old/gcc/config/rs6000/predicates.md     2005-09-28 09:40:35.000000000 
+0200
+++ gcc/gcc/config/rs6000/predicates.md 2005-11-01 21:36:09.000000000 +0100
@@ -271,59 +271,55 @@
 (define_predicate "easy_vector_constant"
   (match_code "const_vector")
 {
-  int cst, cst2;
-
-  if (!TARGET_ALTIVEC && !TARGET_SPE)
-    return 0;
-
-  if (zero_constant (op, mode)
-      && ((TARGET_ALTIVEC && ALTIVEC_VECTOR_MODE (mode))
-         || (TARGET_SPE && SPE_VECTOR_MODE (mode))))
-    return 1;
-
-  if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
-    return 0;
-
-  if (TARGET_SPE && mode == V1DImode)
-    return 0;
-
-  cst  = INTVAL (CONST_VECTOR_ELT (op, 0));
-  cst2 = INTVAL (CONST_VECTOR_ELT (op, 1));
-
-  /* Limit SPE vectors to 15 bits signed.  These we can generate with:
-       li r0, CONSTANT1
-       evmergelo r0, r0, r0
-       li r0, CONSTANT2
-
-     I don't know how efficient it would be to allow bigger constants,
-     considering we'll have an extra 'ori' for every 'li'.  I doubt 5
-     instructions is better than a 64-bit memory load, but I don't
-     have the e500 timing specs.  */
-  if (TARGET_SPE && mode == V2SImode
-      && cst  >= -0x7fff && cst <= 0x7fff
-      && cst2 >= -0x7fff && cst2 <= 0x7fff)
-    return 1;
-
-  if (TARGET_ALTIVEC
-      && easy_vector_same (op, mode))
+  if (ALTIVEC_VECTOR_MODE (mode))
     {
-      cst = easy_vector_splat_const (cst, mode);
-      if (EASY_VECTOR_15_ADD_SELF (cst)
-         || EASY_VECTOR_15 (cst))
-       return 1;
+      if (zero_constant (op, mode))
+        return true;
+      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+        return false;
+
+      return easy_altivec_constant (op, mode);
+    }
+
+  if (SPE_VECTOR_MODE (mode))
+    {
+      int cst, cst2;
+      if (zero_constant (op, mode))
+       return true;
+      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+        return false;
+
+      /* Limit SPE vectors to 15 bits signed.  These we can generate with:
+          li r0, CONSTANT1
+          evmergelo r0, r0, r0
+          li r0, CONSTANT2
+
+        I don't know how efficient it would be to allow bigger constants,
+        considering we'll have an extra 'ori' for every 'li'.  I doubt 5
+        instructions is better than a 64-bit memory load, but I don't
+        have the e500 timing specs.  */
+      if (mode == V2SImode)
+       {
+         cst  = INTVAL (CONST_VECTOR_ELT (op, 0));
+         cst2 = INTVAL (CONST_VECTOR_ELT (op, 1));
+         return cst  >= -0x7fff && cst <= 0x7fff
+                && cst2 >= -0x7fff && cst2 <= 0x7fff;
+       }
     }
-  return 0;
+
+  return false;
 })
 
 ;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF.
 (define_predicate "easy_vector_constant_add_self"
   (and (match_code "const_vector")
        (and (match_test "TARGET_ALTIVEC")
-           (and (match_test "easy_vector_same (op, mode)")
-                (match_test "EASY_VECTOR_15_ADD_SELF
-                               (easy_vector_splat_const
-                                 (INTVAL (CONST_VECTOR_ELT (op, 0)),
-                                  mode))")))))
+           (match_test "easy_altivec_constant (op, mode)")))
+{
+  rtx last = CONST_VECTOR_ELT (op, GET_MODE_NUNITS (mode) - 1);
+  HOST_WIDE_INT val = (char) (INTVAL (last) & 255);
+  return EASY_VECTOR_15_ADD_SELF (val);
+})
 
 ;; Return 1 if operand is constant zero (scalars and vectors).
 (define_predicate "zero_constant"
--- gcc-old/gcc/config/rs6000/altivec.md        2005-09-28 09:40:32.000000000 
+0200
+++ gcc/gcc/config/rs6000/altivec.md    2005-11-01 20:51:28.000000000 +0100
@@ -204,18 +204,6 @@
 })
 
 (define_split
-  [(set (match_operand:V4SI 0 "altivec_register_operand" "")
-       (match_operand:V4SI 1 "easy_vector_constant_add_self" ""))]
-  "TARGET_ALTIVEC && reload_completed"
-  [(set (match_dup 0) (match_dup 3))
-   (set (match_dup 0)
-       (plus:V4SI (match_dup 0)
-                  (match_dup 0)))]
-{ 
-  operands[3] = gen_easy_vector_constant_add_self (operands[1]);
-})    
-
-(define_split
   [(set (match_operand:V8HI 0 "nonimmediate_operand" "")
         (match_operand:V8HI 1 "input_operand" ""))]
   "TARGET_ALTIVEC && reload_completed
@@ -224,18 +212,6 @@
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
 
 (define_split
-  [(set (match_operand:V8HI 0 "altivec_register_operand" "")
-       (match_operand:V8HI 1 "easy_vector_constant_add_self" ""))]
-  "TARGET_ALTIVEC && reload_completed"
-  [(set (match_dup 0) (match_dup 3))
-   (set (match_dup 0)
-       (plus:V8HI (match_dup 0)
-                  (match_dup 0)))]
-{
-  operands[3] = gen_easy_vector_constant_add_self (operands[1]);
-})
-
-(define_split
   [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
         (match_operand:V16QI 1 "input_operand" ""))]
   "TARGET_ALTIVEC && reload_completed
@@ -244,18 +220,6 @@
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
 
 (define_split
-  [(set (match_operand:V16QI 0 "altivec_register_operand" "")
-       (match_operand:V16QI 1 "easy_vector_constant_add_self" ""))]
-  "TARGET_ALTIVEC && reload_completed"
-  [(set (match_dup 0) (match_dup 3))
-   (set (match_dup 0)
-       (plus:V16QI (match_dup 0)
-                  (match_dup 0)))]
-{
-  operands[3] = gen_easy_vector_constant_add_self (operands[1]);
-})
-
-(define_split
   [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
         (match_operand:V4SF 1 "input_operand" ""))]
   "TARGET_ALTIVEC && reload_completed
@@ -265,6 +229,29 @@
   rs6000_split_multireg_move (operands[0], operands[1]); DONE;
 })
 
+(define_split
+  [(set (match_operand:VI 0 "altivec_register_operand" "")
+       (match_operand:VI 1 "easy_vector_constant_add_self" ""))]
+  "TARGET_ALTIVEC && reload_completed"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 0) (plus:VI (match_dup 0)
+                              (match_dup 0)))]
+{
+  rtx dup = gen_easy_altivec_constant (operands[1]);
+  rtx const_vec;
+
+  /* Divide the operand of the resulting VEC_DUPLICATE, and use
+     simplify_rtx to make a CONST_VECTOR.  */
+  XEXP (dup, 0) = simplify_const_binary_operation (ASHIFTRT, QImode,
+                                                  XEXP (dup, 0), const1_rtx);
+  const_vec = simplify_rtx (dup);
+
+  if (GET_MODE (const_vec) == <MODE>mode)
+    operands[3] = const_vec;
+  else
+    operands[3] = gen_lowpart (<MODE>mode, const_vec);
+})
+
 (define_insn "get_vrsave_internal"
   [(set (match_operand:SI 0 "register_operand" "=r")
        (unspec:SI [(reg:SI 109)] UNSPEC_GET_VRSAVE))]
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-options "-maltivec -mabi=altivec -O2" } */

/* Check that "easy" AltiVec constants are correctly synthesized.  */

#include "altivec_check.h"

extern void abort (void);

typedef __attribute__ ((vector_size (16))) unsigned char v16qi;
typedef __attribute__ ((vector_size (16))) unsigned short v8hi;
typedef __attribute__ ((vector_size (16))) unsigned int v4si;

char w[16] __attribute__((aligned(16)));
 

/* Emulate the vspltis? instructions on a 16-byte array of chars.  */

void vspltisb (char *v, char val)
{
  int i;
  for (i = 0; i < 16; i++)
    v[i] = val;
}

void vspltish (char *v, char val)
{
  int i;
  for (i = 0; i < 16; i += 2)
    v[i] = val >> 7, v[i + 1] = val;
}

void vspltisw (char *v, char val)
{
  int i;
  for (i = 0; i < 16; i += 4)
    v[i] = v[i + 1] = v[i + 2] = val >> 7, v[i + 3] = val;
}


/* Use three different check functions for each mode-instruction pair.
   The callers have no typecasting and no addressable vectors, to make
   the test more robust.  */

void __attribute__ ((noinline)) check_v16qi (v16qi v1, char *v2)
{
  if (memcmp (&v1, v2, 16))
    abort ();
}

void __attribute__ ((noinline)) check_v8hi (v8hi v1, char *v2)
{
  if (memcmp (&v1, v2, 16))
    abort ();
}

void __attribute__ ((noinline)) check_v4si (v4si v1, char *v2)
{
  if (memcmp (&v1, v2, 16))
    abort ();
}


/* V16QI tests.  */

void v16qi_vspltisb ()
{
  v16qi v = { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 };
  vspltisb (w, 15);
  check_v16qi (v, w);
}

void v16qi_vspltisb_neg ()
{
  v16qi v = { -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5 };
  vspltisb (w, -5);
  check_v16qi (v, w);
}

void v16qi_vspltisb_addself ()
{
  v16qi v = { 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30 };
  vspltisb (w, 30);
  check_v16qi (v, w);
}

void v16qi_vspltisb_neg_addself ()
{
  v16qi v = { -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, 
-24, -24, -24 };
  vspltisb (w, -24);
  check_v16qi (v, w);
}

void v16qi_vspltish ()
{
  v16qi v = { 0, 15, 0, 15, 0, 15, 0, 15, 0, 15, 0, 15, 0, 15, 0, 15 };
  vspltish (w, 15);
  check_v16qi (v, w);
}

void v16qi_vspltish_addself ()
{
  v16qi v = { 0, 30, 0, 30, 0, 30, 0, 30, 0, 30, 0, 30, 0, 30, 0, 30 };
  vspltish (w, 30);
  check_v16qi (v, w);
}

void v16qi_vspltish_neg ()
{
  v16qi v = { -1, -5, -1, -5, -1, -5, -1, -5, -1, -5, -1, -5, -1, -5, -1, -5 };
  vspltish (w, -5);
  check_v16qi (v, w);
}

void v16qi_vspltisw ()
{
  v16qi v = { 0, 0, 0, 15, 0, 0, 0, 15, 0, 0, 0, 15, 0, 0, 0, 15 };
  vspltisw (w, 15);
  check_v16qi (v, w);
}

void v16qi_vspltisw_addself ()
{
  v16qi v = { 0, 0, 0, 30, 0, 0, 0, 30, 0, 0, 0, 30, 0, 0, 0, 30 };
  vspltisw (w, 30);
  check_v16qi (v, w);
}

void v16qi_vspltisw_neg ()
{
  v16qi v = { -1, -1, -1, -5, -1, -1, -1, -5, -1, -1, -1, -5, -1, -1, -1, -5 };
  vspltisw (w, -5);
  check_v16qi (v, w);
}


/* V8HI tests. */

void v8hi_vspltisb ()
{
  v8hi v = { 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F };
  vspltisb (w, 15);
  check_v8hi (v, w);
}

void v8hi_vspltisb_addself ()
{
  v8hi v = { 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E };
  vspltisb (w, 30);
  check_v8hi (v, w);
}

void v8hi_vspltisb_neg ()
{
  v8hi v = { 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB };
  vspltisb (w, -5);
  check_v8hi (v, w);
}

void v8hi_vspltish ()
{
  v8hi v = { 15, 15, 15, 15, 15, 15, 15, 15 };
  vspltish (w, 15);
  check_v8hi (v, w);
}

void v8hi_vspltish_neg ()
{
  v8hi v = { -5, -5, -5, -5, -5, -5, -5, -5 };
  vspltish (w, -5);
  check_v8hi (v, w);
}

void v8hi_vspltish_addself ()
{
  v8hi v = { 30, 30, 30, 30, 30, 30, 30, 30 };
  vspltish (w, 30);
  check_v8hi (v, w);
}

void v8hi_vspltish_neg_addself ()
{
  v8hi v = { -24, -24, -24, -24, -24, -24, -24, -24 };
  vspltish (w, -24);
  check_v8hi (v, w);
}

void v8hi_vspltisw ()
{
  v8hi v = { 0, 15, 0, 15, 0, 15, 0, 15 };
  vspltisw (w, 15);
  check_v8hi (v, w);
}

void v8hi_vspltisw_addself ()
{
  v8hi v = { 0, 30, 0, 30, 0, 30, 0, 30 };
  vspltisw (w, 30);
  check_v8hi (v, w);
}

void v8hi_vspltisw_neg ()
{
  v8hi v = { -1, -5, -1, -5, -1, -5, -1, -5 };
  vspltisw (w, -5);
  check_v8hi (v, w);
}

/* V4SI tests. */

void v4si_vspltisb ()
{
  v4si v = { 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F };
  vspltisb (w, 15);
  check_v4si (v, w);
}

void v4si_vspltisb_addself ()
{
  v4si v = { 0x1E1E1E1E, 0x1E1E1E1E, 0x1E1E1E1E, 0x1E1E1E1E };
  vspltisb (w, 30);
  check_v4si (v, w);
}

void v4si_vspltisb_neg ()
{
  v4si v = { 0xFBFBFBFB, 0xFBFBFBFB, 0xFBFBFBFB, 0xFBFBFBFB };
  vspltisb (w, -5);
  check_v4si (v, w);
}

void v4si_vspltish ()
{
  v4si v = { 0x000F000F, 0x000F000F, 0x000F000F, 0x000F000F };
  vspltish (w, 15);
  check_v4si (v, w);
}

void v4si_vspltish_addself ()
{
  v4si v = { 0x001E001E, 0x001E001E, 0x001E001E, 0x001E001E };
  vspltish (w, 30);
  check_v4si (v, w);
}

void v4si_vspltish_neg ()
{
  v4si v = { 0xFFFBFFFB, 0xFFFBFFFB, 0xFFFBFFFB, 0xFFFBFFFB };
  vspltish (w, -5);
  check_v4si (v, w);
}

void v4si_vspltisw ()
{
  v4si v = { 15, 15, 15, 15 };
  vspltisw (w, 15);
  check_v4si (v, w);
}

void v4si_vspltisw_neg ()
{
  v4si v = { -5, -5, -5, -5 };
  vspltisw (w, -5);
  check_v4si (v, w);
}

void v4si_vspltisw_addself ()
{
  v4si v = { 30, 30, 30, 30 };
  vspltisw (w, 30);
  check_v4si (v, w);
}

void v4si_vspltisw_neg_addself ()
{
  v4si v = { -24, -24, -24, -24 };
  vspltisw (w, -24);
  check_v4si (v, w);
}



int main ()
{
  altivec_check ();   /* Exit if hardware doesn't support AltiVec.  */

  v16qi_vspltisb ();
  v16qi_vspltisb_neg ();
  v16qi_vspltisb_addself ();
  v16qi_vspltisb_neg_addself ();
  v16qi_vspltish ();
  v16qi_vspltish_addself ();
  v16qi_vspltish_neg ();
  v16qi_vspltisw ();
  v16qi_vspltisw_addself ();
  v16qi_vspltisw_neg ();

  v8hi_vspltisb ();
  v8hi_vspltisb_addself ();
  v8hi_vspltisb_neg ();
  v8hi_vspltish ();
  v8hi_vspltish_neg ();
  v8hi_vspltish_addself ();
  v8hi_vspltish_neg_addself ();
  v8hi_vspltisw ();
  v8hi_vspltisw_addself ();
  v8hi_vspltisw_neg ();

  v4si_vspltisb ();
  v4si_vspltisb_addself ();
  v4si_vspltisb_neg ();
  v4si_vspltish ();
  v4si_vspltish_addself ();
  v4si_vspltish_neg ();
  v4si_vspltisw ();
  v4si_vspltisw_neg ();
  v4si_vspltisw_addself ();
  v4si_vspltisw_neg_addself ();
  return 0;
}
/* { dg-do compile { target powerpc*-*-* } } */
/* { dg-options "-maltivec -mabi=altivec -O2" } */

/* Testcase by Richard Guenther and Steven Bosscher.
   Check that "easy" AltiVec constants are correctly synthesized
   if they need to be reloaded.  */

typedef __attribute__ ((vector_size (16))) unsigned char v16qi;
typedef __attribute__ ((vector_size (16))) unsigned short v8hi;
typedef __attribute__ ((vector_size (16))) unsigned int v4si;

#define REGLIST                                                         \
  "77",  "78",  "79",  "80",  "81",  "82",  "83",  "84",  "85",  "86",  \
  "87",  "88",  "89",  "90",  "91",  "92",  "93",  "94",  "95",  "96",  \
  "97",  "98",  "99", "100", "101", "102", "103", "104", "105", "106",  \
 "107", "108"


#define TEST(a, result, b)                              \
  void a##_##b (int h)                                  \
  {                                                     \
    volatile a tmp;                                     \
    while (h-- > 0)                                     \
      {                                                 \
        asm ("" : : : REGLIST);                         \
        tmp = (a) (result) __builtin_altivec_##b (5);   \
      }                                                 \
  }                                                     \
                                                        \
  void a##_##b##_neg (int h)                            \
  {                                                     \
    volatile a tmp;                                     \
    while (h-- > 0)                                     \
      {                                                 \
        asm ("" : : : REGLIST);                         \
        tmp = (a) (result) __builtin_altivec_##b (-5);  \
      }                                                 \
  }

TEST(v16qi, v16qi, vspltisb)
TEST(v16qi, v8hi, vspltish)
TEST(v16qi, v4si, vspltisw)
TEST(v8hi, v16qi, vspltisb)
TEST(v8hi, v8hi, vspltish)
TEST(v8hi, v4si, vspltisw)
TEST(v4si, v16qi, vspltisb)
TEST(v4si, v8hi, vspltish)
TEST(v4si, v4si, vspltisw)
<Prev in Thread] Current Thread [Next in Thread>