|
|
This patch fixes PR24230 by making the PowerPC back-end able to
recognize more constants as "easy". Basically, a vspltis[bhw]
instruction can now be used for all of V16QImode, V8HImode, V4SImode.
Previously, only "smaller" modes were tried: V8HImode constants could be
made with vspltisb, but V16QImode constants could not be made with
vspltish. In the PR, the vspltish was created with an AltiVec builtin,
and after a cast to V16QImode CSE created a REQ_EQUAL note:
(insn:HI 26 64 56 2 (set (mem/v/c/i:V16QI (reg:SI 9 9) [0 tmp+0 S16 A128])
(reg:V16QI 77 0)) 467 {altivec_stvx_v16qi} (insn_list:REG_DEP_TRUE 22
(nil))
(expr_list:REG_EQUAL (const_vector:V16QI [
(const_int 0 [0x0]) (const_int 5 [0x5])
(const_int 0 [0x0]) (const_int 5 [0x5])
(const_int 0 [0x0]) (const_int 5 [0x5])
(const_int 0 [0x0]) (const_int 5 [0x5])
(const_int 0 [0x0]) (const_int 5 [0x5])
(const_int 0 [0x0]) (const_int 5 [0x5])
(const_int 0 [0x0]) (const_int 5 [0x5])
(const_int 0 [0x0]) (const_int 5 [0x5])
])
(nil)))
In the end, this caused a reload failure when trying to remat the
constant. With this patch, the back-end is able to remat the constant
with a vspltish.
The patch was not tested very much (will do so in the next few days). I
include two testcases, one tests code correctness for the newly
recognized constants, and the other tests reloading constants for all
combinations of vsplit[bhw] and mode.
Ok if bootstrapping/regtesting passes?
Paolo
2005-11-01 Paolo Bonzini <bonzini@xxxxxxx>
PR target/24230
* config/rs6000/rs6000.c (easy_vector_splat_const, easy_vector_same,
gen_easy_vector_constant_add_self): Delete.
(vspltis_constant, easy_altivec_constant, gen_easy_altivec_constant):
New.
(output_vec_const_move): Use gen_easy_altivec_constant.
(rs6000_expand_vector_init): Do not emit a set of a VEC_DUPLICATE.
* config/rs6000/predicates.md (easy_vector_constant): Reorganize tests.
easy_vector_constant_add_self): Rewritten.
* config/rs6000/rs6000-protos.h (easy_vector_splat_const,
easy_vector_same, gen_easy_vector_constant_add_self): Remove prototype.
(easy_altivec_constant, gen_easy_altivec_constant): Add prototype.
* config/rs6000/altivec.md (easy_vector_constant_add_self spitters):
Macroize and adjust for the other changes.
--- gcc-old/gcc/config/rs6000/rs6000-protos.h 2005-09-28 09:40:36.000000000
+0200
+++ gcc/gcc/config/rs6000/rs6000-protos.h 2005-11-01 21:38:39.000000000
+0100
@@ -32,8 +32,7 @@
extern void rs6000_va_start (tree, rtx);
#endif /* TREE_CODE */
-extern int easy_vector_same (rtx, enum machine_mode);
-extern int easy_vector_splat_const (int, enum machine_mode);
+extern bool easy_altivec_constant (rtx, enum machine_mode);
extern bool macho_lo_sum_memory_operand (rtx, enum machine_mode);
extern int num_insns_constant (rtx, enum machine_mode);
extern int num_insns_constant_wide (HOST_WIDE_INT);
@@ -48,7 +47,7 @@
extern rtx rs6000_got_register (rtx);
extern rtx find_addr_reg (rtx);
-extern rtx gen_easy_vector_constant_add_self (rtx);
+extern rtx gen_easy_altivec_constant (rtx);
extern const char *output_vec_const_move (rtx *);
extern void rs6000_expand_vector_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, int);
--- gcc-old/gcc/config/rs6000/rs6000.c 2005-09-28 09:40:37.000000000 +0200
+++ gcc/gcc/config/rs6000/rs6000.c 2005-11-01 21:24:55.000000000 +0100
@@ -2038,73 +2038,150 @@
}
}
-/* Returns the constant for the splat instruction, if exists. */
-int
-easy_vector_splat_const (int cst, enum machine_mode mode)
+/* Return true if OP can be synthesized with a particular vspltisb, vspltish
+ or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
+ depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
+ all items are set to the same value and contain COPIES replicas of the
+ vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
+ operand and the others are set to the value of the operand's msb. */
+
+static bool
+vspltis_constant (rtx op, unsigned step, unsigned copies)
{
- switch (mode)
- {
- case V4SImode:
- if (EASY_VECTOR_15 (cst)
- || EASY_VECTOR_15_ADD_SELF (cst))
- return cst;
- if ((cst & 0xffff) != ((cst >> 16) & 0xffff))
- break;
- cst = cst >> 16;
- /* Fall thru */
+ enum machine_mode mode = GET_MODE (op);
+ enum machine_mode inner = GET_MODE_INNER (mode);
- case V8HImode:
- if (EASY_VECTOR_15 (cst)
- || EASY_VECTOR_15_ADD_SELF (cst))
- return cst;
- if ((cst & 0xff) != ((cst >> 8) & 0xff))
- break;
- cst = cst >> 8;
- /* Fall thru */
+ unsigned i;
+ unsigned nunits = GET_MODE_NUNITS (mode);
+ unsigned bitsize = GET_MODE_BITSIZE (inner);
+ unsigned mask = GET_MODE_MASK (inner);
+
+ rtx last = CONST_VECTOR_ELT (op, nunits - 1);
+ HOST_WIDE_INT val = INTVAL (last);
+ HOST_WIDE_INT splat_val = val;
+ HOST_WIDE_INT msb_val = val > 0 ? 0 : -1;
+
+ /* Construct the value to be splatted, if possible. If not, return 0. */
+ for (i = 2; i <= copies; i *= 2)
+ {
+ HOST_WIDE_INT small_val;
+ bitsize /= 2;
+ small_val = splat_val >> bitsize;
+ mask >>= bitsize;
+ if (splat_val != ((small_val << bitsize) | (small_val & mask)))
+ return false;
+ splat_val = small_val;
+ }
- case V16QImode:
- if (EASY_VECTOR_15 (cst)
- || EASY_VECTOR_15_ADD_SELF (cst))
- return cst;
- default:
- break;
+ /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
+ if (EASY_VECTOR_15 (splat_val))
+ ;
+
+ /* Also check if we can splat and then add the result to itself. Do so if
+ the value is positive, of if the splat instruction is using OP's mode;
+ for splat_val < 0, the splat and the add should use the same mode. */
+ else if (EASY_VECTOR_15_ADD_SELF (splat_val)
+ && (splat_val >= 0 || (step == 1 && copies == 1)))
+ ;
+
+ else
+ return false;
+
+ /* Check if VAL is present in every STEP-th element, and the
+ other elements are filled with its most significant bit. */
+ for (i = 0; i < nunits - 1; ++i)
+ {
+ HOST_WIDE_INT desired_val;
+ if (((i + 1) & (step - 1)) == 0)
+ desired_val = val;
+ else
+ desired_val = msb_val;
+
+ if (desired_val != INTVAL (CONST_VECTOR_ELT (op, i)))
+ return false;
}
- return 0;
+
+ return true;
}
-/* Return nonzero if all elements of a vector have the same value. */
-int
-easy_vector_same (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+/* Return true if OP is of the given MODE and can be synthesized
+ with a vspltisb, vspltish or vspltisw. */
+
+bool
+easy_altivec_constant (rtx op, enum machine_mode mode)
{
- int units, i, cst;
+ unsigned step, copies;
- units = CONST_VECTOR_NUNITS (op);
+ if (mode == VOIDmode)
+ mode = GET_MODE (op);
+ else if (mode != GET_MODE (op))
+ return false;
- cst = INTVAL (CONST_VECTOR_ELT (op, 0));
- for (i = 1; i < units; ++i)
- if (INTVAL (CONST_VECTOR_ELT (op, i)) != cst)
- break;
- if (i == units && easy_vector_splat_const (cst, mode))
- return 1;
- return 0;
+ /* Start with a vspltisw. */
+ step = GET_MODE_NUNITS (mode) / 4;
+ copies = 1;
+
+ if (vspltis_constant (op, step, copies))
+ return true;
+
+ /* Then try with a vspltish. */
+ if (step == 1)
+ copies <<= 1;
+ else
+ step >>= 1;
+
+ if (vspltis_constant (op, step, copies))
+ return true;
+
+ /* And finally a vspltisb. */
+ if (step == 1)
+ copies <<= 1;
+ else
+ step >>= 1;
+
+ if (vspltis_constant (op, step, copies))
+ return true;
+
+ return false;
}
-/* Generate easy_vector_constant out of a easy_vector_constant_add_self. */
+/* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
+ result is OP. Abort if it is not possible. */
rtx
-gen_easy_vector_constant_add_self (rtx op)
+gen_easy_altivec_constant (rtx op)
{
- int i, units;
- rtvec v;
- units = GET_MODE_NUNITS (GET_MODE (op));
- v = rtvec_alloc (units);
-
- for (i = 0; i < units; i++)
- RTVEC_ELT (v, i) =
- GEN_INT (INTVAL (CONST_VECTOR_ELT (op, i)) >> 1);
- return gen_rtx_raw_CONST_VECTOR (GET_MODE (op), v);
+ enum machine_mode mode = GET_MODE (op);
+ int nunits = GET_MODE_NUNITS (mode);
+ rtx last = CONST_VECTOR_ELT (op, nunits - 1);
+ unsigned step = nunits / 4;
+ unsigned copies = 1;
+
+ /* Start with a vspltisw. */
+ if (vspltis_constant (op, step, copies))
+ return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, last));
+
+ /* Then try with a vspltish. */
+ if (step == 1)
+ copies <<= 1;
+ else
+ step >>= 1;
+
+ if (vspltis_constant (op, step, copies))
+ return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, last));
+
+ /* And finally a vspltisb. */
+ if (step == 1)
+ copies <<= 1;
+ else
+ step >>= 1;
+
+ if (vspltis_constant (op, step, copies))
+ return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, last));
+
+ gcc_unreachable ();
}
const char *
@@ -2123,44 +2200,26 @@
if (TARGET_ALTIVEC)
{
+ rtx splat_vec;
if (zero_constant (vec, mode))
return "vxor %0,%0,%0";
- gcc_assert (easy_vector_constant (vec, mode));
+ splat_vec = gen_easy_altivec_constant (vec);
+ gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
+ operands[1] = XEXP (splat_vec, 0);
+ if (!EASY_VECTOR_15 (INTVAL (operands[1])))
+ return "#";
- operands[1] = GEN_INT (cst);
- switch (mode)
+ switch (GET_MODE (splat_vec))
{
case V4SImode:
- if (EASY_VECTOR_15 (cst))
- {
- operands[1] = GEN_INT (cst);
- return "vspltisw %0,%1";
- }
- else if (EASY_VECTOR_15_ADD_SELF (cst))
- return "#";
- cst = cst >> 16;
- /* Fall thru */
+ return "vspltisw %0,%1";
case V8HImode:
- if (EASY_VECTOR_15 (cst))
- {
- operands[1] = GEN_INT (cst);
- return "vspltish %0,%1";
- }
- else if (EASY_VECTOR_15_ADD_SELF (cst))
- return "#";
- cst = cst >> 8;
- /* Fall thru */
+ return "vspltish %0,%1";
case V16QImode:
- if (EASY_VECTOR_15 (cst))
- {
- operands[1] = GEN_INT (cst);
- return "vspltisb %0,%1";
- }
- else if (EASY_VECTOR_15_ADD_SELF (cst))
- return "#";
+ return "vspltisb %0,%1";
default:
gcc_unreachable ();
@@ -2216,11 +2275,10 @@
gen_rtx_XOR (mode, target, target)));
return;
}
- else if (mode != V4SFmode && easy_vector_same (vals, mode))
+ else if (mode != V4SFmode && easy_vector_constant (vals, mode))
{
/* Splat immediate. */
- x = gen_rtx_VEC_DUPLICATE (mode, CONST_VECTOR_ELT (vals, 0));
- emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ emit_insn (gen_rtx_SET (VOIDmode, target, vals));
return;
}
else if (all_same)
--- gcc-old/gcc/config/rs6000/predicates.md 2005-09-28 09:40:35.000000000
+0200
+++ gcc/gcc/config/rs6000/predicates.md 2005-11-01 21:36:09.000000000 +0100
@@ -271,59 +271,55 @@
(define_predicate "easy_vector_constant"
(match_code "const_vector")
{
- int cst, cst2;
-
- if (!TARGET_ALTIVEC && !TARGET_SPE)
- return 0;
-
- if (zero_constant (op, mode)
- && ((TARGET_ALTIVEC && ALTIVEC_VECTOR_MODE (mode))
- || (TARGET_SPE && SPE_VECTOR_MODE (mode))))
- return 1;
-
- if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
- return 0;
-
- if (TARGET_SPE && mode == V1DImode)
- return 0;
-
- cst = INTVAL (CONST_VECTOR_ELT (op, 0));
- cst2 = INTVAL (CONST_VECTOR_ELT (op, 1));
-
- /* Limit SPE vectors to 15 bits signed. These we can generate with:
- li r0, CONSTANT1
- evmergelo r0, r0, r0
- li r0, CONSTANT2
-
- I don't know how efficient it would be to allow bigger constants,
- considering we'll have an extra 'ori' for every 'li'. I doubt 5
- instructions is better than a 64-bit memory load, but I don't
- have the e500 timing specs. */
- if (TARGET_SPE && mode == V2SImode
- && cst >= -0x7fff && cst <= 0x7fff
- && cst2 >= -0x7fff && cst2 <= 0x7fff)
- return 1;
-
- if (TARGET_ALTIVEC
- && easy_vector_same (op, mode))
+ if (ALTIVEC_VECTOR_MODE (mode))
{
- cst = easy_vector_splat_const (cst, mode);
- if (EASY_VECTOR_15_ADD_SELF (cst)
- || EASY_VECTOR_15 (cst))
- return 1;
+ if (zero_constant (op, mode))
+ return true;
+ if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+ return false;
+
+ return easy_altivec_constant (op, mode);
+ }
+
+ if (SPE_VECTOR_MODE (mode))
+ {
+ int cst, cst2;
+ if (zero_constant (op, mode))
+ return true;
+ if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+ return false;
+
+ /* Limit SPE vectors to 15 bits signed. These we can generate with:
+ li r0, CONSTANT1
+ evmergelo r0, r0, r0
+ li r0, CONSTANT2
+
+ I don't know how efficient it would be to allow bigger constants,
+ considering we'll have an extra 'ori' for every 'li'. I doubt 5
+ instructions is better than a 64-bit memory load, but I don't
+ have the e500 timing specs. */
+ if (mode == V2SImode)
+ {
+ cst = INTVAL (CONST_VECTOR_ELT (op, 0));
+ cst2 = INTVAL (CONST_VECTOR_ELT (op, 1));
+ return cst >= -0x7fff && cst <= 0x7fff
+ && cst2 >= -0x7fff && cst2 <= 0x7fff;
+ }
}
- return 0;
+
+ return false;
})
;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF.
(define_predicate "easy_vector_constant_add_self"
(and (match_code "const_vector")
(and (match_test "TARGET_ALTIVEC")
- (and (match_test "easy_vector_same (op, mode)")
- (match_test "EASY_VECTOR_15_ADD_SELF
- (easy_vector_splat_const
- (INTVAL (CONST_VECTOR_ELT (op, 0)),
- mode))")))))
+ (match_test "easy_altivec_constant (op, mode)")))
+{
+ rtx last = CONST_VECTOR_ELT (op, GET_MODE_NUNITS (mode) - 1);
+ HOST_WIDE_INT val = (char) (INTVAL (last) & 255);
+ return EASY_VECTOR_15_ADD_SELF (val);
+})
;; Return 1 if operand is constant zero (scalars and vectors).
(define_predicate "zero_constant"
--- gcc-old/gcc/config/rs6000/altivec.md 2005-09-28 09:40:32.000000000
+0200
+++ gcc/gcc/config/rs6000/altivec.md 2005-11-01 20:51:28.000000000 +0100
@@ -204,18 +204,6 @@
})
(define_split
- [(set (match_operand:V4SI 0 "altivec_register_operand" "")
- (match_operand:V4SI 1 "easy_vector_constant_add_self" ""))]
- "TARGET_ALTIVEC && reload_completed"
- [(set (match_dup 0) (match_dup 3))
- (set (match_dup 0)
- (plus:V4SI (match_dup 0)
- (match_dup 0)))]
-{
- operands[3] = gen_easy_vector_constant_add_self (operands[1]);
-})
-
-(define_split
[(set (match_operand:V8HI 0 "nonimmediate_operand" "")
(match_operand:V8HI 1 "input_operand" ""))]
"TARGET_ALTIVEC && reload_completed
@@ -224,18 +212,6 @@
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
(define_split
- [(set (match_operand:V8HI 0 "altivec_register_operand" "")
- (match_operand:V8HI 1 "easy_vector_constant_add_self" ""))]
- "TARGET_ALTIVEC && reload_completed"
- [(set (match_dup 0) (match_dup 3))
- (set (match_dup 0)
- (plus:V8HI (match_dup 0)
- (match_dup 0)))]
-{
- operands[3] = gen_easy_vector_constant_add_self (operands[1]);
-})
-
-(define_split
[(set (match_operand:V16QI 0 "nonimmediate_operand" "")
(match_operand:V16QI 1 "input_operand" ""))]
"TARGET_ALTIVEC && reload_completed
@@ -244,18 +220,6 @@
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
(define_split
- [(set (match_operand:V16QI 0 "altivec_register_operand" "")
- (match_operand:V16QI 1 "easy_vector_constant_add_self" ""))]
- "TARGET_ALTIVEC && reload_completed"
- [(set (match_dup 0) (match_dup 3))
- (set (match_dup 0)
- (plus:V16QI (match_dup 0)
- (match_dup 0)))]
-{
- operands[3] = gen_easy_vector_constant_add_self (operands[1]);
-})
-
-(define_split
[(set (match_operand:V4SF 0 "nonimmediate_operand" "")
(match_operand:V4SF 1 "input_operand" ""))]
"TARGET_ALTIVEC && reload_completed
@@ -265,6 +229,29 @@
rs6000_split_multireg_move (operands[0], operands[1]); DONE;
})
+(define_split
+ [(set (match_operand:VI 0 "altivec_register_operand" "")
+ (match_operand:VI 1 "easy_vector_constant_add_self" ""))]
+ "TARGET_ALTIVEC && reload_completed"
+ [(set (match_dup 0) (match_dup 3))
+ (set (match_dup 0) (plus:VI (match_dup 0)
+ (match_dup 0)))]
+{
+ rtx dup = gen_easy_altivec_constant (operands[1]);
+ rtx const_vec;
+
+ /* Divide the operand of the resulting VEC_DUPLICATE, and use
+ simplify_rtx to make a CONST_VECTOR. */
+ XEXP (dup, 0) = simplify_const_binary_operation (ASHIFTRT, QImode,
+ XEXP (dup, 0), const1_rtx);
+ const_vec = simplify_rtx (dup);
+
+ if (GET_MODE (const_vec) == <MODE>mode)
+ operands[3] = const_vec;
+ else
+ operands[3] = gen_lowpart (<MODE>mode, const_vec);
+})
+
(define_insn "get_vrsave_internal"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI [(reg:SI 109)] UNSPEC_GET_VRSAVE))]
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-options "-maltivec -mabi=altivec -O2" } */
/* Check that "easy" AltiVec constants are correctly synthesized. */
#include "altivec_check.h"
extern void abort (void);
typedef __attribute__ ((vector_size (16))) unsigned char v16qi;
typedef __attribute__ ((vector_size (16))) unsigned short v8hi;
typedef __attribute__ ((vector_size (16))) unsigned int v4si;
char w[16] __attribute__((aligned(16)));
/* Emulate the vspltis? instructions on a 16-byte array of chars. */
void vspltisb (char *v, char val)
{
int i;
for (i = 0; i < 16; i++)
v[i] = val;
}
void vspltish (char *v, char val)
{
int i;
for (i = 0; i < 16; i += 2)
v[i] = val >> 7, v[i + 1] = val;
}
void vspltisw (char *v, char val)
{
int i;
for (i = 0; i < 16; i += 4)
v[i] = v[i + 1] = v[i + 2] = val >> 7, v[i + 3] = val;
}
/* Use three different check functions for each mode-instruction pair.
The callers have no typecasting and no addressable vectors, to make
the test more robust. */
void __attribute__ ((noinline)) check_v16qi (v16qi v1, char *v2)
{
if (memcmp (&v1, v2, 16))
abort ();
}
void __attribute__ ((noinline)) check_v8hi (v8hi v1, char *v2)
{
if (memcmp (&v1, v2, 16))
abort ();
}
void __attribute__ ((noinline)) check_v4si (v4si v1, char *v2)
{
if (memcmp (&v1, v2, 16))
abort ();
}
/* V16QI tests. */
void v16qi_vspltisb ()
{
v16qi v = { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 };
vspltisb (w, 15);
check_v16qi (v, w);
}
void v16qi_vspltisb_neg ()
{
v16qi v = { -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5 };
vspltisb (w, -5);
check_v16qi (v, w);
}
void v16qi_vspltisb_addself ()
{
v16qi v = { 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30 };
vspltisb (w, 30);
check_v16qi (v, w);
}
void v16qi_vspltisb_neg_addself ()
{
v16qi v = { -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
-24, -24, -24 };
vspltisb (w, -24);
check_v16qi (v, w);
}
void v16qi_vspltish ()
{
v16qi v = { 0, 15, 0, 15, 0, 15, 0, 15, 0, 15, 0, 15, 0, 15, 0, 15 };
vspltish (w, 15);
check_v16qi (v, w);
}
void v16qi_vspltish_addself ()
{
v16qi v = { 0, 30, 0, 30, 0, 30, 0, 30, 0, 30, 0, 30, 0, 30, 0, 30 };
vspltish (w, 30);
check_v16qi (v, w);
}
void v16qi_vspltish_neg ()
{
v16qi v = { -1, -5, -1, -5, -1, -5, -1, -5, -1, -5, -1, -5, -1, -5, -1, -5 };
vspltish (w, -5);
check_v16qi (v, w);
}
void v16qi_vspltisw ()
{
v16qi v = { 0, 0, 0, 15, 0, 0, 0, 15, 0, 0, 0, 15, 0, 0, 0, 15 };
vspltisw (w, 15);
check_v16qi (v, w);
}
void v16qi_vspltisw_addself ()
{
v16qi v = { 0, 0, 0, 30, 0, 0, 0, 30, 0, 0, 0, 30, 0, 0, 0, 30 };
vspltisw (w, 30);
check_v16qi (v, w);
}
void v16qi_vspltisw_neg ()
{
v16qi v = { -1, -1, -1, -5, -1, -1, -1, -5, -1, -1, -1, -5, -1, -1, -1, -5 };
vspltisw (w, -5);
check_v16qi (v, w);
}
/* V8HI tests. */
void v8hi_vspltisb ()
{
v8hi v = { 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F };
vspltisb (w, 15);
check_v8hi (v, w);
}
void v8hi_vspltisb_addself ()
{
v8hi v = { 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E };
vspltisb (w, 30);
check_v8hi (v, w);
}
void v8hi_vspltisb_neg ()
{
v8hi v = { 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB };
vspltisb (w, -5);
check_v8hi (v, w);
}
void v8hi_vspltish ()
{
v8hi v = { 15, 15, 15, 15, 15, 15, 15, 15 };
vspltish (w, 15);
check_v8hi (v, w);
}
void v8hi_vspltish_neg ()
{
v8hi v = { -5, -5, -5, -5, -5, -5, -5, -5 };
vspltish (w, -5);
check_v8hi (v, w);
}
void v8hi_vspltish_addself ()
{
v8hi v = { 30, 30, 30, 30, 30, 30, 30, 30 };
vspltish (w, 30);
check_v8hi (v, w);
}
void v8hi_vspltish_neg_addself ()
{
v8hi v = { -24, -24, -24, -24, -24, -24, -24, -24 };
vspltish (w, -24);
check_v8hi (v, w);
}
void v8hi_vspltisw ()
{
v8hi v = { 0, 15, 0, 15, 0, 15, 0, 15 };
vspltisw (w, 15);
check_v8hi (v, w);
}
void v8hi_vspltisw_addself ()
{
v8hi v = { 0, 30, 0, 30, 0, 30, 0, 30 };
vspltisw (w, 30);
check_v8hi (v, w);
}
void v8hi_vspltisw_neg ()
{
v8hi v = { -1, -5, -1, -5, -1, -5, -1, -5 };
vspltisw (w, -5);
check_v8hi (v, w);
}
/* V4SI tests. */
void v4si_vspltisb ()
{
v4si v = { 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F };
vspltisb (w, 15);
check_v4si (v, w);
}
void v4si_vspltisb_addself ()
{
v4si v = { 0x1E1E1E1E, 0x1E1E1E1E, 0x1E1E1E1E, 0x1E1E1E1E };
vspltisb (w, 30);
check_v4si (v, w);
}
void v4si_vspltisb_neg ()
{
v4si v = { 0xFBFBFBFB, 0xFBFBFBFB, 0xFBFBFBFB, 0xFBFBFBFB };
vspltisb (w, -5);
check_v4si (v, w);
}
void v4si_vspltish ()
{
v4si v = { 0x000F000F, 0x000F000F, 0x000F000F, 0x000F000F };
vspltish (w, 15);
check_v4si (v, w);
}
void v4si_vspltish_addself ()
{
v4si v = { 0x001E001E, 0x001E001E, 0x001E001E, 0x001E001E };
vspltish (w, 30);
check_v4si (v, w);
}
void v4si_vspltish_neg ()
{
v4si v = { 0xFFFBFFFB, 0xFFFBFFFB, 0xFFFBFFFB, 0xFFFBFFFB };
vspltish (w, -5);
check_v4si (v, w);
}
void v4si_vspltisw ()
{
v4si v = { 15, 15, 15, 15 };
vspltisw (w, 15);
check_v4si (v, w);
}
void v4si_vspltisw_neg ()
{
v4si v = { -5, -5, -5, -5 };
vspltisw (w, -5);
check_v4si (v, w);
}
void v4si_vspltisw_addself ()
{
v4si v = { 30, 30, 30, 30 };
vspltisw (w, 30);
check_v4si (v, w);
}
void v4si_vspltisw_neg_addself ()
{
v4si v = { -24, -24, -24, -24 };
vspltisw (w, -24);
check_v4si (v, w);
}
int main ()
{
altivec_check (); /* Exit if hardware doesn't support AltiVec. */
v16qi_vspltisb ();
v16qi_vspltisb_neg ();
v16qi_vspltisb_addself ();
v16qi_vspltisb_neg_addself ();
v16qi_vspltish ();
v16qi_vspltish_addself ();
v16qi_vspltish_neg ();
v16qi_vspltisw ();
v16qi_vspltisw_addself ();
v16qi_vspltisw_neg ();
v8hi_vspltisb ();
v8hi_vspltisb_addself ();
v8hi_vspltisb_neg ();
v8hi_vspltish ();
v8hi_vspltish_neg ();
v8hi_vspltish_addself ();
v8hi_vspltish_neg_addself ();
v8hi_vspltisw ();
v8hi_vspltisw_addself ();
v8hi_vspltisw_neg ();
v4si_vspltisb ();
v4si_vspltisb_addself ();
v4si_vspltisb_neg ();
v4si_vspltish ();
v4si_vspltish_addself ();
v4si_vspltish_neg ();
v4si_vspltisw ();
v4si_vspltisw_neg ();
v4si_vspltisw_addself ();
v4si_vspltisw_neg_addself ();
return 0;
}
/* { dg-do compile { target powerpc*-*-* } } */
/* { dg-options "-maltivec -mabi=altivec -O2" } */
/* Testcase by Richard Guenther and Steven Bosscher.
Check that "easy" AltiVec constants are correctly synthesized
if they need to be reloaded. */
typedef __attribute__ ((vector_size (16))) unsigned char v16qi;
typedef __attribute__ ((vector_size (16))) unsigned short v8hi;
typedef __attribute__ ((vector_size (16))) unsigned int v4si;
#define REGLIST \
"77", "78", "79", "80", "81", "82", "83", "84", "85", "86", \
"87", "88", "89", "90", "91", "92", "93", "94", "95", "96", \
"97", "98", "99", "100", "101", "102", "103", "104", "105", "106", \
"107", "108"
#define TEST(a, result, b) \
void a##_##b (int h) \
{ \
volatile a tmp; \
while (h-- > 0) \
{ \
asm ("" : : : REGLIST); \
tmp = (a) (result) __builtin_altivec_##b (5); \
} \
} \
\
void a##_##b##_neg (int h) \
{ \
volatile a tmp; \
while (h-- > 0) \
{ \
asm ("" : : : REGLIST); \
tmp = (a) (result) __builtin_altivec_##b (-5); \
} \
}
TEST(v16qi, v16qi, vspltisb)
TEST(v16qi, v8hi, vspltish)
TEST(v16qi, v4si, vspltisw)
TEST(v8hi, v16qi, vspltisb)
TEST(v8hi, v8hi, vspltish)
TEST(v8hi, v4si, vspltisw)
TEST(v4si, v16qi, vspltisb)
TEST(v4si, v8hi, vspltish)
TEST(v4si, v4si, vspltisw)
|
|