gcc-patches@gcc.gnu.org
[Top] [All Lists]

[PATCH] Fix reload failures on inline asm from mplayer SVN (PR rtl-opti

Subject: [PATCH] Fix reload failures on inline asm from mplayer SVN PR rtl-optimization/39543, take 2
From: Jakub Jelinek
Date: Fri, 27 Mar 2009 12:25:54 +0100
On Thu, Mar 26, 2009 at 05:52:16PM +0100, Steven Bosscher wrote:
> [ xf. http://gcc.gnu.org/ml/gcc-patches/2009-03/msg01315.html ]
> 
> > +static int
> > +check_reg_count_callback (rtx *px, void *data)
> 
> Misses comment before the function.

Function gone.

> > +{
> > +  int *regnop = (int *) data;
> > +
> > +  if (!REG_P (*px))
> > +    return 0;
> > +
> > +  if (*regnop < 0 || *regnop == (int) REGNO (*px))
> 
> Eh, when can you have *regnop < 0?

As Paolo already said, this was being initialized to -1.

> > +/* Try to replace USE with SRC (defined in DEF_INSN) in __asm.  */
> > +
> > +static bool
> > +forward_propagate_asm (df_ref use, rtx def_set, rtx reg)
> 
> Where is DEF_INSN?  You mean DEF_SET in the comment, I think.

I'm now passing also def_insn.

> > +  /* In __asm don't replace if src might need more registers than
> > +     reg, as that could increase register pressure on the __asm.  */
> > +  regno = -1;
> > +  if (for_each_rtx (&src, check_reg_count_callback, &regno) > 0)
> > +    return false;
> 
> Can you use DF_INSN_USES of DEF_SET instead here? All for_each_rtx and
> note_stores should die in places where the DF cache can use :-)

Yep, that works too.  Updated patch, bootstrapped/regtested on x86_64-linux
and i686-linux.  Ok for trunk?

2009-03-27  Jakub Jelinek  <jakub@xxxxxxxxxx>

        PR rtl-optimization/39543
        * fwprop.c (forward_propagate_asm): New function.
        (forward_propagate_and_simplify): Propagate also into __asm, if it
        doesn't increase the number of referenced registers.

        * gcc.target/i386/pr39543-1.c: New test.
        * gcc.target/i386/pr39543-2.c: New test.
        * gcc.target/i386/pr39543-3.c: New test.

--- gcc/fwprop.c.jj     2009-03-27 07:55:33.000000000 +0100
+++ gcc/fwprop.c        2009-03-27 10:00:48.000000000 +0100
@@ -1,5 +1,5 @@
 /* RTL-based forward propagation pass for GNU compiler.
-   Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+   Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
    Contributed by Paolo Bonzini and Steven Bosscher.
 
 This file is part of GCC.
@@ -852,6 +852,73 @@ forward_propagate_subreg (df_ref use, rt
     return false;
 }
 
+/* Try to replace USE with SRC (defined in DEF_INSN) in __asm.  */
+
+static bool
+forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg)
+{
+  rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc;
+  int speed_p, i;
+  df_ref *use_vec;
+
+  gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
+
+  src = SET_SRC (def_set);
+  use_pat = PATTERN (use_insn);
+
+  /* In __asm don't replace if src might need more registers than
+     reg, as that could increase register pressure on the __asm.  */
+  use_vec = DF_INSN_USES (def_insn);
+  if (use_vec[0] && use_vec[1])
+    return false;
+
+  speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
+  asm_operands = NULL_RTX;
+  switch (GET_CODE (use_pat))
+    {
+    case ASM_OPERANDS:
+      asm_operands = use_pat;
+      break;
+    case SET:
+      loc = &SET_DEST (use_pat);
+      new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
+      if (new_rtx)
+       validate_unshare_change (use_insn, loc, new_rtx, true);
+      asm_operands = SET_SRC (use_pat);
+      break;
+    case PARALLEL:
+      for (i = 0; i < XVECLEN (use_pat, 0); i++)
+       if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
+         {
+           loc = &SET_DEST (XVECEXP (use_pat, 0, i));
+           new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
+           if (new_rtx)
+             validate_unshare_change (use_insn, loc, new_rtx, true);
+           asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
+         }
+       else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
+         asm_operands = XVECEXP (use_pat, 0, i);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
+  for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
+    {
+      loc = &ASM_OPERANDS_INPUT (asm_operands, i);
+      new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
+      if (new_rtx)
+       validate_unshare_change (use_insn, loc, new_rtx, true);
+    }
+
+  if (num_changes_pending () == 0 || !apply_change_group ())
+    return false;
+
+  num_changes++;
+  return true;
+}
+
 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
    result.  */
 
@@ -863,12 +930,16 @@ forward_propagate_and_simplify (df_ref u
   rtx src, reg, new_rtx, *loc;
   bool set_reg_equal;
   enum machine_mode mode;
+  int asm_use = -1;
+
+  if (INSN_CODE (use_insn) < 0)
+    asm_use = asm_noperands (PATTERN (use_insn));
 
-  if (!use_set)
+  if (!use_set && asm_use < 0)
     return false;
 
   /* Do not propagate into PC, CC0, etc.  */
-  if (GET_MODE (SET_DEST (use_set)) == VOIDmode)
+  if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
     return false;
 
   /* If def and use are subreg, check if they match.  */
@@ -900,7 +971,7 @@ forward_propagate_and_simplify (df_ref u
   if (MEM_P (src) && MEM_READONLY_P (src))
     {
       rtx x = avoid_constant_pool_reference (src);
-      if (x != src)
+      if (x != src && use_set)
        {
           rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
          rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
@@ -911,6 +982,9 @@ forward_propagate_and_simplify (df_ref u
       return false;
     }
 
+  if (asm_use >= 0)
+    return forward_propagate_asm (use, def_insn, def_set, reg);
+
   /* Else try simplifying.  */
 
   if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
--- gcc/testsuite/gcc.target/i386/pr39543-1.c.jj        2009-03-25 
16:40:18.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/pr39543-1.c   2009-03-25 16:40:50.000000000 
+0100
@@ -0,0 +1,52 @@
+/* PR rtl-optimization/39543 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -fomit-frame-pointer" } */
+
+float __attribute__ ((aligned (16))) s0[128];
+const float s1 = 0.707;
+float s2[8] __attribute__ ((aligned (16)));
+float s3[8] __attribute__ ((aligned (16)));
+float s4[16] __attribute__ ((aligned (16)));
+float s5[16] __attribute__ ((aligned (16)));
+
+void
+foo (int k, float *x, float *y, const float *d, const float *z)
+{
+  float *a, *b, *c, *e;
+
+  a = x + 2 * k;
+  b = a + 2 * k;
+  c = b + 2 * k;
+  e = y + 2 * k;
+  __asm__ volatile (""
+                   : "=m" (x[0]), "=m" (b[0]), "=m" (a[0]), "=m" (c[0])
+                   : "m" (y[0]), "m" (y[k * 2]), "m" (x[0]), "m" (a[0])
+                   : "memory");
+  for (;;)
+    {
+      __asm__ volatile (""
+                       :
+                       : "m" (y[2]), "m" (d[2]), "m" (e[2]), "m" (z[2])
+                       : "memory");
+      if (!--k)
+       break;
+    }
+  __asm__ volatile (""
+                   : "=m" (x[2]), "=m" (x[10]), "=m" (x[6]), "=m" (x[14])
+                   : "m" (y[2]), "m" (y[6]), "m" (x[2]), "m" (x[6]),
+                     "m" (y[18]), "m" (s1)
+                   : "memory");
+}
+
+void
+bar (float *a)
+{
+  foo (4, a, a + 16, s2, s3);
+  foo (8, a, a + 32, s4, s5);
+}
+
+void
+baz (void)
+{
+  bar (s0);
+}
--- gcc/testsuite/gcc.target/i386/pr39543-2.c.jj        2009-03-25 
16:40:18.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/pr39543-2.c   2009-03-25 16:40:38.000000000 
+0100
@@ -0,0 +1,51 @@
+/* PR rtl-optimization/39543 */
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+float __attribute__ ((aligned (16))) s0[128];
+const float s1 = 0.707;
+float s2[8] __attribute__ ((aligned (16)));
+float s3[8] __attribute__ ((aligned (16)));
+float s4[16] __attribute__ ((aligned (16)));
+float s5[16] __attribute__ ((aligned (16)));
+
+void
+foo (int k, float *x, float *y, const float *d, const float *z)
+{
+  float *a, *b, *c, *e;
+
+  a = x + 2 * k;
+  b = a + 2 * k;
+  c = b + 2 * k;
+  e = y + 2 * k;
+  __asm__ volatile (""
+                   : "=m" (x[0]), "=m" (b[0]), "=m" (a[0]), "=m" (c[0])
+                   : "m" (y[0]), "m" (y[k * 2]), "m" (x[0]), "m" (a[0])
+                   : "memory");
+  for (;;)
+    {
+      __asm__ volatile (""
+                       :
+                       : "m" (y[2]), "m" (d[2]), "m" (e[2]), "m" (z[2])
+                       : "memory");
+      if (!--k)
+       break;
+    }
+  __asm__ volatile (""
+                   : "=m" (x[2]), "=m" (x[10]), "=m" (x[6]), "=m" (x[14])
+                   : "m" (y[2]), "m" (y[6]), "m" (x[2]), "m" (x[6]), "m" (s1)
+                   : "memory");
+}
+
+void
+bar (float *a)
+{
+  foo (4, a, a + 16, s2, s3);
+  foo (8, a, a + 32, s4, s5);
+}
+
+void
+baz (void)
+{
+  bar (s0);
+}
--- gcc/testsuite/gcc.target/i386/pr39543-3.c.jj        2009-03-25 
16:41:29.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/pr39543-3.c   2009-03-25 16:41:19.000000000 
+0100
@@ -0,0 +1,42 @@
+/* PR rtl-optimization/39543 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int s[128];
+
+void
+f1 (void)
+{
+  int i;
+  asm volatile ("# %0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 
%17"
+               : "=r" (i)
+               : "m" (s[0]), "m" (s[2]), "m" (s[4]), "m" (s[6]), "m" (s[8]),
+                 "m" (s[10]), "m" (s[12]), "m" (s[14]), "m" (s[16]), "m" 
(s[18]),
+                 "m" (s[20]), "m" (s[22]), "m" (s[24]), "m" (s[26]), "m" 
(s[28]),
+                 "m" (s[30]), "m" (s[32]));
+  asm volatile ("# %0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 
%17"
+               : "=r" (i)
+               : "m" (s[0]), "m" (s[2]), "m" (s[4]), "m" (s[6]), "m" (s[8]),
+                 "m" (s[10]), "m" (s[12]), "m" (s[14]), "m" (s[16]), "m" 
(s[18]),
+                 "m" (s[20]), "m" (s[22]), "m" (s[24]), "m" (s[26]), "m" 
(s[28]),
+                 "m" (s[30]), "m" (s[32]));
+}
+
+void
+f2 (int *q)
+{
+  int i;
+  int *p = q + 32;
+  asm volatile ("# %0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 
%17"
+               : "=r" (i)
+               : "m" (p[0]), "m" (p[2]), "m" (p[4]), "m" (p[6]), "m" (p[8]),
+                 "m" (p[10]), "m" (p[12]), "m" (p[14]), "m" (p[16]), "m" 
(p[18]),
+                 "m" (p[20]), "m" (p[22]), "m" (p[24]), "m" (p[26]), "m" 
(p[28]),
+                 "m" (p[30]), "m" (p[32]));
+  asm volatile ("# %0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 
%17"
+               : "=r" (i)
+               : "m" (p[0]), "m" (p[2]), "m" (p[4]), "m" (p[6]), "m" (p[8]),
+                 "m" (p[10]), "m" (p[12]), "m" (p[14]), "m" (p[16]), "m" 
(p[18]),
+                 "m" (p[20]), "m" (p[22]), "m" (p[24]), "m" (p[26]), "m" 
(p[28]),
+                 "m" (p[30]), "m" (p[32]));
+}


        Jakub

<Prev in Thread] Current Thread [Next in Thread>