308 lines
10 KiB
Diff
308 lines
10 KiB
Diff
|
From 536b8318974495cde2b42c3c2742748e2b271be0 Mon Sep 17 00:00:00 2001
|
|||
|
From: ktkachov <ktkachov@138bc75d-0d04-0410-961f-82ee72b054a4>
|
|||
|
Date: Wed, 27 May 2015 13:25:01 +0000
|
|||
|
Subject: [PATCH] PR target/65358 Avoid clobbering partial argument during
|
|||
|
sibcall
|
|||
|
|
|||
|
PR target/65358
|
|||
|
* expr.c (memory_load_overlap): New function.
|
|||
|
(emit_push_insn): When pushing partial args to the stack would
|
|||
|
clobber the register part load the overlapping part into a pseudo
|
|||
|
and put it into the hard reg after pushing. Change return type
|
|||
|
to bool. Add bool argument.
|
|||
|
* expr.h (emit_push_insn): Change return type to bool.
|
|||
|
Add bool argument.
|
|||
|
* calls.c (expand_call): Cancel sibcall optimization when encountering
|
|||
|
partial argument on targets with ARGS_GROW_DOWNWARD and
|
|||
|
!STACK_GROWS_DOWNWARD.
|
|||
|
(emit_library_call_value_1): Update callsite of emit_push_insn.
|
|||
|
(store_one_arg): Likewise.
|
|||
|
|
|||
|
PR target/65358
|
|||
|
* gcc.dg/pr65358.c: New test.
|
|||
|
|
|||
|
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@223753 138bc75d-0d04-0410-961f-82ee72b054a4
|
|||
|
|
|||
|
Upstream-Status: Backport from 6.0
|
|||
|
Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com>
|
|||
|
---
|
|||
|
gcc/calls.c | 17 ++++++--
|
|||
|
gcc/expr.c | 90 +++++++++++++++++++++++++++++++++++++-----
|
|||
|
gcc/expr.h | 4 +-
|
|||
|
gcc/testsuite/gcc.dg/pr65358.c | 33 ++++++++++++++++
|
|||
|
4 files changed, 129 insertions(+), 15 deletions(-)
|
|||
|
create mode 100644 gcc/testsuite/gcc.dg/pr65358.c
|
|||
|
|
|||
|
diff --git a/gcc/calls.c b/gcc/calls.c
|
|||
|
index ee8ea5f..2334381 100644
|
|||
|
--- a/gcc/calls.c
|
|||
|
+++ b/gcc/calls.c
|
|||
|
@@ -3236,6 +3236,14 @@ expand_call (tree exp, rtx target, int ignore)
|
|||
|
{
|
|||
|
rtx_insn *before_arg = get_last_insn ();
|
|||
|
|
|||
|
+ /* On targets with weird calling conventions (e.g. PA) it's
|
|||
|
+ hard to ensure that all cases of argument overlap between
|
|||
|
+ stack and registers work. Play it safe and bail out. */
|
|||
|
+#if defined(ARGS_GROW_DOWNWARD) && !defined(STACK_GROWS_DOWNWARD)
|
|||
|
+ sibcall_failure = 1;
|
|||
|
+ break;
|
|||
|
+#endif
|
|||
|
+
|
|||
|
if (store_one_arg (&args[i], argblock, flags,
|
|||
|
adjusted_args_size.var != 0,
|
|||
|
reg_parm_stack_space)
|
|||
|
@@ -4279,7 +4287,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
|
|||
|
partial, reg, 0, argblock,
|
|||
|
GEN_INT (argvec[argnum].locate.offset.constant),
|
|||
|
reg_parm_stack_space,
|
|||
|
- ARGS_SIZE_RTX (argvec[argnum].locate.alignment_pad));
|
|||
|
+ ARGS_SIZE_RTX (argvec[argnum].locate.alignment_pad), false);
|
|||
|
|
|||
|
/* Now mark the segment we just used. */
|
|||
|
if (ACCUMULATE_OUTGOING_ARGS)
|
|||
|
@@ -4886,10 +4894,11 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
|
|||
|
|
|||
|
/* This isn't already where we want it on the stack, so put it there.
|
|||
|
This can either be done with push or copy insns. */
|
|||
|
- emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), NULL_RTX,
|
|||
|
+ if (!emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), NULL_RTX,
|
|||
|
parm_align, partial, reg, used - size, argblock,
|
|||
|
ARGS_SIZE_RTX (arg->locate.offset), reg_parm_stack_space,
|
|||
|
- ARGS_SIZE_RTX (arg->locate.alignment_pad));
|
|||
|
+ ARGS_SIZE_RTX (arg->locate.alignment_pad), true))
|
|||
|
+ sibcall_failure = 1;
|
|||
|
|
|||
|
/* Unless this is a partially-in-register argument, the argument is now
|
|||
|
in the stack. */
|
|||
|
@@ -5001,7 +5010,7 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
|
|||
|
emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), size_rtx,
|
|||
|
parm_align, partial, reg, excess, argblock,
|
|||
|
ARGS_SIZE_RTX (arg->locate.offset), reg_parm_stack_space,
|
|||
|
- ARGS_SIZE_RTX (arg->locate.alignment_pad));
|
|||
|
+ ARGS_SIZE_RTX (arg->locate.alignment_pad), false);
|
|||
|
|
|||
|
/* Unless this is a partially-in-register argument, the argument is now
|
|||
|
in the stack.
|
|||
|
diff --git a/gcc/expr.c b/gcc/expr.c
|
|||
|
index 5c09550..24a6293 100644
|
|||
|
--- a/gcc/expr.c
|
|||
|
+++ b/gcc/expr.c
|
|||
|
@@ -4121,12 +4121,35 @@ emit_single_push_insn (machine_mode mode, rtx x, tree type)
|
|||
|
}
|
|||
|
#endif
|
|||
|
|
|||
|
+/* If reading SIZE bytes from X will end up reading from
|
|||
|
+ Y return the number of bytes that overlap. Return -1
|
|||
|
+ if there is no overlap or -2 if we can't determine
|
|||
|
+ (for example when X and Y have different base registers). */
|
|||
|
+
|
|||
|
+static int
|
|||
|
+memory_load_overlap (rtx x, rtx y, HOST_WIDE_INT size)
|
|||
|
+{
|
|||
|
+ rtx tmp = plus_constant (Pmode, x, size);
|
|||
|
+ rtx sub = simplify_gen_binary (MINUS, Pmode, tmp, y);
|
|||
|
+
|
|||
|
+ if (!CONST_INT_P (sub))
|
|||
|
+ return -2;
|
|||
|
+
|
|||
|
+ HOST_WIDE_INT val = INTVAL (sub);
|
|||
|
+
|
|||
|
+ return IN_RANGE (val, 1, size) ? val : -1;
|
|||
|
+}
|
|||
|
+
|
|||
|
/* Generate code to push X onto the stack, assuming it has mode MODE and
|
|||
|
type TYPE.
|
|||
|
MODE is redundant except when X is a CONST_INT (since they don't
|
|||
|
carry mode info).
|
|||
|
SIZE is an rtx for the size of data to be copied (in bytes),
|
|||
|
needed only if X is BLKmode.
|
|||
|
+ Return true if successful. May return false if asked to push a
|
|||
|
+ partial argument during a sibcall optimization (as specified by
|
|||
|
+ SIBCALL_P) and the incoming and outgoing pointers cannot be shown
|
|||
|
+ to not overlap.
|
|||
|
|
|||
|
ALIGN (in bits) is maximum alignment we can assume.
|
|||
|
|
|||
|
@@ -4152,11 +4175,11 @@ emit_single_push_insn (machine_mode mode, rtx x, tree type)
|
|||
|
for arguments passed in registers. If nonzero, it will be the number
|
|||
|
of bytes required. */
|
|||
|
|
|||
|
-void
|
|||
|
+bool
|
|||
|
emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
|
|||
|
unsigned int align, int partial, rtx reg, int extra,
|
|||
|
rtx args_addr, rtx args_so_far, int reg_parm_stack_space,
|
|||
|
- rtx alignment_pad)
|
|||
|
+ rtx alignment_pad, bool sibcall_p)
|
|||
|
{
|
|||
|
rtx xinner;
|
|||
|
enum direction stack_direction
|
|||
|
@@ -4179,6 +4202,10 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
|
|||
|
|
|||
|
xinner = x;
|
|||
|
|
|||
|
+ int nregs = partial / UNITS_PER_WORD;
|
|||
|
+ rtx *tmp_regs = NULL;
|
|||
|
+ int overlapping = 0;
|
|||
|
+
|
|||
|
if (mode == BLKmode
|
|||
|
|| (STRICT_ALIGNMENT && align < GET_MODE_ALIGNMENT (mode)))
|
|||
|
{
|
|||
|
@@ -4309,6 +4336,43 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
|
|||
|
PARM_BOUNDARY. Assume the caller isn't lying. */
|
|||
|
set_mem_align (target, align);
|
|||
|
|
|||
|
+ /* If part should go in registers and pushing to that part would
|
|||
|
+ overwrite some of the values that need to go into regs, load the
|
|||
|
+ overlapping values into temporary pseudos to be moved into the hard
|
|||
|
+ regs at the end after the stack pushing has completed.
|
|||
|
+ We cannot load them directly into the hard regs here because
|
|||
|
+ they can be clobbered by the block move expansions.
|
|||
|
+ See PR 65358. */
|
|||
|
+
|
|||
|
+ if (partial > 0 && reg != 0 && mode == BLKmode
|
|||
|
+ && GET_CODE (reg) != PARALLEL)
|
|||
|
+ {
|
|||
|
+ overlapping = memory_load_overlap (XEXP (x, 0), temp, partial);
|
|||
|
+ if (overlapping > 0)
|
|||
|
+ {
|
|||
|
+ gcc_assert (overlapping % UNITS_PER_WORD == 0);
|
|||
|
+ overlapping /= UNITS_PER_WORD;
|
|||
|
+
|
|||
|
+ tmp_regs = XALLOCAVEC (rtx, overlapping);
|
|||
|
+
|
|||
|
+ for (int i = 0; i < overlapping; i++)
|
|||
|
+ tmp_regs[i] = gen_reg_rtx (word_mode);
|
|||
|
+
|
|||
|
+ for (int i = 0; i < overlapping; i++)
|
|||
|
+ emit_move_insn (tmp_regs[i],
|
|||
|
+ operand_subword_force (target, i, mode));
|
|||
|
+ }
|
|||
|
+ else if (overlapping == -1)
|
|||
|
+ overlapping = 0;
|
|||
|
+ /* Could not determine whether there is overlap.
|
|||
|
+ Fail the sibcall. */
|
|||
|
+ else
|
|||
|
+ {
|
|||
|
+ overlapping = 0;
|
|||
|
+ if (sibcall_p)
|
|||
|
+ return false;
|
|||
|
+ }
|
|||
|
+ }
|
|||
|
emit_block_move (target, xinner, size, BLOCK_OP_CALL_PARM);
|
|||
|
}
|
|||
|
}
|
|||
|
@@ -4363,12 +4427,13 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
|
|||
|
has a size a multiple of a word. */
|
|||
|
for (i = size - 1; i >= not_stack; i--)
|
|||
|
if (i >= not_stack + offset)
|
|||
|
- emit_push_insn (operand_subword_force (x, i, mode),
|
|||
|
+ if (!emit_push_insn (operand_subword_force (x, i, mode),
|
|||
|
word_mode, NULL_TREE, NULL_RTX, align, 0, NULL_RTX,
|
|||
|
0, args_addr,
|
|||
|
GEN_INT (args_offset + ((i - not_stack + skip)
|
|||
|
* UNITS_PER_WORD)),
|
|||
|
- reg_parm_stack_space, alignment_pad);
|
|||
|
+ reg_parm_stack_space, alignment_pad, sibcall_p))
|
|||
|
+ return false;
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
@@ -4411,9 +4476,8 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
- /* If part should go in registers, copy that part
|
|||
|
- into the appropriate registers. Do this now, at the end,
|
|||
|
- since mem-to-mem copies above may do function calls. */
|
|||
|
+ /* Move the partial arguments into the registers and any overlapping
|
|||
|
+ values that we moved into the pseudos in tmp_regs. */
|
|||
|
if (partial > 0 && reg != 0)
|
|||
|
{
|
|||
|
/* Handle calls that pass values in multiple non-contiguous locations.
|
|||
|
@@ -4421,9 +4485,15 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
|
|||
|
if (GET_CODE (reg) == PARALLEL)
|
|||
|
emit_group_load (reg, x, type, -1);
|
|||
|
else
|
|||
|
- {
|
|||
|
+ {
|
|||
|
gcc_assert (partial % UNITS_PER_WORD == 0);
|
|||
|
- move_block_to_reg (REGNO (reg), x, partial / UNITS_PER_WORD, mode);
|
|||
|
+ move_block_to_reg (REGNO (reg), x, nregs - overlapping, mode);
|
|||
|
+
|
|||
|
+ for (int i = 0; i < overlapping; i++)
|
|||
|
+ emit_move_insn (gen_rtx_REG (word_mode, REGNO (reg)
|
|||
|
+ + nregs - overlapping + i),
|
|||
|
+ tmp_regs[i]);
|
|||
|
+
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
@@ -4432,6 +4502,8 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
|
|||
|
|
|||
|
if (alignment_pad && args_addr == 0)
|
|||
|
anti_adjust_stack (alignment_pad);
|
|||
|
+
|
|||
|
+ return true;
|
|||
|
}
|
|||
|
|
|||
|
/* Return X if X can be used as a subtarget in a sequence of arithmetic
|
|||
|
diff --git a/gcc/expr.h b/gcc/expr.h
|
|||
|
index 867852e..5fcc13f 100644
|
|||
|
--- a/gcc/expr.h
|
|||
|
+++ b/gcc/expr.h
|
|||
|
@@ -218,8 +218,8 @@ extern rtx emit_move_resolve_push (machine_mode, rtx);
|
|||
|
extern rtx push_block (rtx, int, int);
|
|||
|
|
|||
|
/* Generate code to push something onto the stack, given its mode and type. */
|
|||
|
-extern void emit_push_insn (rtx, machine_mode, tree, rtx, unsigned int,
|
|||
|
- int, rtx, int, rtx, rtx, int, rtx);
|
|||
|
+extern bool emit_push_insn (rtx, machine_mode, tree, rtx, unsigned int,
|
|||
|
+ int, rtx, int, rtx, rtx, int, rtx, bool);
|
|||
|
|
|||
|
/* Expand an assignment that stores the value of FROM into TO. */
|
|||
|
extern void expand_assignment (tree, tree, bool);
|
|||
|
diff --git a/gcc/testsuite/gcc.dg/pr65358.c b/gcc/testsuite/gcc.dg/pr65358.c
|
|||
|
new file mode 100644
|
|||
|
index 0000000..ba89fd4
|
|||
|
--- /dev/null
|
|||
|
+++ b/gcc/testsuite/gcc.dg/pr65358.c
|
|||
|
@@ -0,0 +1,33 @@
|
|||
|
+/* { dg-do run } */
|
|||
|
+/* { dg-options "-O2" } */
|
|||
|
+
|
|||
|
+struct pack
|
|||
|
+{
|
|||
|
+ int fine;
|
|||
|
+ int victim;
|
|||
|
+ int killer;
|
|||
|
+};
|
|||
|
+
|
|||
|
+int __attribute__ ((__noinline__, __noclone__))
|
|||
|
+bar (int a, int b, struct pack p)
|
|||
|
+{
|
|||
|
+ if (a != 20 || b != 30)
|
|||
|
+ __builtin_abort ();
|
|||
|
+ if (p.fine != 40 || p.victim != 50 || p.killer != 60)
|
|||
|
+ __builtin_abort ();
|
|||
|
+ return 0;
|
|||
|
+}
|
|||
|
+
|
|||
|
+int __attribute__ ((__noinline__, __noclone__))
|
|||
|
+foo (int arg1, int arg2, int arg3, struct pack p)
|
|||
|
+{
|
|||
|
+ return bar (arg2, arg3, p);
|
|||
|
+}
|
|||
|
+
|
|||
|
+int main (void)
|
|||
|
+{
|
|||
|
+ struct pack p = { 40, 50, 60 };
|
|||
|
+
|
|||
|
+ (void) foo (10, 20, 30, p);
|
|||
|
+ return 0;
|
|||
|
+}
|
|||
|
--
|
|||
|
2.7.0
|
|||
|
|