mirror of https://github.com/gcc-mirror/gcc.git
Add widening expansion of MULT_HIGHPART_EXPR for integral modes
For integral modes the expansion of MULT_HIGHPART_EXPR requires the presence of an {s,u}mul_highpart optab whereas, for vector modes, widening expansion is supported. This adds a widening expansion for integral modes too, which is in fact already implemented in expmed_mult_highpart_optab. gcc/ * expmed.h (expmed_mult_highpart_optab): Declare. * expmed.cc (expmed_mult_highpart_optab): Remove static keyword. Do not assume that OP1 is a constant integer. Fix pasto. (expmed_mult_highpart): Pass OP1 narrowed to MODE in all the calls to expmed_mult_highpart_optab. * optabs-query.cc (can_mult_highpart_p): Use 2 for integer widening and shift subsequent values accordingly. * optabs.cc (expand_mult_highpart): Call expmed_mult_highpart_optab when can_mult_highpart_p returns 2 and adjust to above change.pull/91/merge
parent
b420e0b920
commit
f53f8a8596
|
@ -2748,8 +2748,7 @@ static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
|
|||
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
|
||||
static rtx extract_high_half (scalar_int_mode, rtx);
|
||||
static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
|
||||
static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
|
||||
int, int);
|
||||
|
||||
/* Compute and return the best algorithm for multiplying by T.
|
||||
The algorithm must cost less than cost_limit
|
||||
If retval.cost >= COST_LIMIT, no algorithm was found and all
|
||||
|
@ -3856,30 +3855,25 @@ extract_high_half (scalar_int_mode mode, rtx op)
|
|||
return convert_modes (mode, wider_mode, op, 0);
|
||||
}
|
||||
|
||||
/* Like expmed_mult_highpart, but only consider using a multiplication
|
||||
optab. OP1 is an rtx for the constant operand. */
|
||||
/* Like expmed_mult_highpart, but only consider using multiplication optab. */
|
||||
|
||||
static rtx
|
||||
rtx
|
||||
expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
|
||||
rtx target, int unsignedp, int max_cost)
|
||||
{
|
||||
rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
|
||||
const scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
|
||||
const bool speed = optimize_insn_for_speed_p ();
|
||||
const int size = GET_MODE_BITSIZE (mode);
|
||||
optab moptab;
|
||||
rtx tem;
|
||||
int size;
|
||||
bool speed = optimize_insn_for_speed_p ();
|
||||
|
||||
scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
|
||||
|
||||
size = GET_MODE_BITSIZE (mode);
|
||||
|
||||
/* Firstly, try using a multiplication insn that only generates the needed
|
||||
high part of the product, and in the sign flavor of unsignedp. */
|
||||
if (mul_highpart_cost (speed, mode) < max_cost)
|
||||
{
|
||||
moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
|
||||
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
|
||||
unsignedp, OPTAB_DIRECT);
|
||||
tem = expand_binop (mode, moptab, op0, op1, target, unsignedp,
|
||||
OPTAB_DIRECT);
|
||||
if (tem)
|
||||
return tem;
|
||||
}
|
||||
|
@ -3892,12 +3886,12 @@ expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
|
|||
+ 4 * add_cost (speed, mode) < max_cost))
|
||||
{
|
||||
moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
|
||||
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
|
||||
unsignedp, OPTAB_DIRECT);
|
||||
tem = expand_binop (mode, moptab, op0, op1, target, !unsignedp,
|
||||
OPTAB_DIRECT);
|
||||
if (tem)
|
||||
/* We used the wrong signedness. Adjust the result. */
|
||||
return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
|
||||
tem, unsignedp);
|
||||
return expand_mult_highpart_adjust (mode, tem, op0, op1, tem,
|
||||
unsignedp);
|
||||
}
|
||||
|
||||
/* Try widening multiplication. */
|
||||
|
@ -3905,8 +3899,8 @@ expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
|
|||
if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
|
||||
&& mul_widen_cost (speed, wider_mode) < max_cost)
|
||||
{
|
||||
tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
|
||||
unsignedp, OPTAB_WIDEN);
|
||||
tem = expand_binop (wider_mode, moptab, op0, op1, NULL_RTX, unsignedp,
|
||||
OPTAB_WIDEN);
|
||||
if (tem)
|
||||
return extract_high_half (mode, tem);
|
||||
}
|
||||
|
@ -3947,14 +3941,14 @@ expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
|
|||
+ 2 * shift_cost (speed, mode, size-1)
|
||||
+ 4 * add_cost (speed, mode) < max_cost))
|
||||
{
|
||||
tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
|
||||
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
|
||||
tem = expand_binop (wider_mode, moptab, op0, op1, NULL_RTX, !unsignedp,
|
||||
OPTAB_WIDEN);
|
||||
if (tem != 0)
|
||||
{
|
||||
tem = extract_high_half (mode, tem);
|
||||
/* We used the wrong signedness. Adjust the result. */
|
||||
return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
|
||||
target, unsignedp);
|
||||
return expand_mult_highpart_adjust (mode, tem, op0, op1, target,
|
||||
unsignedp);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3976,18 +3970,19 @@ static rtx
|
|||
expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
|
||||
rtx target, int unsignedp, int max_cost)
|
||||
{
|
||||
const bool speed = optimize_insn_for_speed_p ();
|
||||
unsigned HOST_WIDE_INT cnst1;
|
||||
int extra_cost;
|
||||
bool sign_adjust = false;
|
||||
enum mult_variant variant;
|
||||
struct algorithm alg;
|
||||
rtx tem;
|
||||
bool speed = optimize_insn_for_speed_p ();
|
||||
rtx narrow_op1, tem;
|
||||
|
||||
/* We can't support modes wider than HOST_BITS_PER_INT. */
|
||||
gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
|
||||
|
||||
cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
|
||||
narrow_op1 = gen_int_mode (INTVAL (op1), mode);
|
||||
|
||||
/* We can't optimize modes wider than BITS_PER_WORD.
|
||||
??? We might be able to perform double-word arithmetic if
|
||||
|
@ -3995,7 +3990,7 @@ expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
|
|||
synth_mult etc. assume single-word operations. */
|
||||
scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
|
||||
if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
|
||||
return expmed_mult_highpart_optab (mode, op0, op1, target,
|
||||
return expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
|
||||
unsignedp, max_cost);
|
||||
|
||||
extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
|
||||
|
@ -4013,7 +4008,8 @@ expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
|
|||
{
|
||||
/* See whether the specialized multiplication optabs are
|
||||
cheaper than the shift/add version. */
|
||||
tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
|
||||
tem = expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
|
||||
unsignedp,
|
||||
alg.cost.cost + extra_cost);
|
||||
if (tem)
|
||||
return tem;
|
||||
|
@ -4028,7 +4024,7 @@ expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
|
|||
|
||||
return tem;
|
||||
}
|
||||
return expmed_mult_highpart_optab (mode, op0, op1, target,
|
||||
return expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
|
||||
unsignedp, max_cost);
|
||||
}
|
||||
|
||||
|
|
|
@ -724,5 +724,7 @@ extern rtx extract_low_bits (machine_mode, machine_mode, rtx);
|
|||
extern rtx expand_mult (machine_mode, rtx, rtx, rtx, int, bool = false);
|
||||
extern rtx expand_mult_highpart_adjust (scalar_int_mode, rtx, rtx, rtx,
|
||||
rtx, int);
|
||||
extern rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
|
||||
int, int);
|
||||
|
||||
#endif // EXPMED_H
|
||||
|
|
|
@ -502,19 +502,35 @@ find_widening_optab_handler_and_mode (optab op, machine_mode to_mode,
|
|||
return CODE_FOR_nothing;
|
||||
}
|
||||
|
||||
/* Return non-zero if a highpart multiply is supported of can be synthisized.
|
||||
/* Return non-zero if a highpart multiply is supported or can be synthesized.
|
||||
For the benefit of expand_mult_highpart, the return value is 1 for direct,
|
||||
2 for even/odd widening, and 3 for hi/lo widening. */
|
||||
2 for integral widening, 3 for even/odd widening, 4 for hi/lo widening. */
|
||||
|
||||
int
|
||||
can_mult_highpart_p (machine_mode mode, bool uns_p)
|
||||
{
|
||||
optab op;
|
||||
scalar_int_mode int_mode;
|
||||
|
||||
op = uns_p ? umul_highpart_optab : smul_highpart_optab;
|
||||
if (optab_handler (op, mode) != CODE_FOR_nothing)
|
||||
return 1;
|
||||
|
||||
/* If the mode is integral, synth from widening or larger operations. */
|
||||
if (is_a <scalar_int_mode> (mode, &int_mode))
|
||||
{
|
||||
scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (int_mode).require ();
|
||||
|
||||
op = uns_p ? umul_widen_optab : smul_widen_optab;
|
||||
if (convert_optab_handler (op, wider_mode, mode) != CODE_FOR_nothing)
|
||||
return 2;
|
||||
|
||||
/* The test on the size comes from expmed_mult_highpart_optab. */
|
||||
if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
|
||||
&& GET_MODE_BITSIZE (int_mode) - 1 < BITS_PER_WORD)
|
||||
return 2;
|
||||
}
|
||||
|
||||
/* If the mode is an integral vector, synth from widening operations. */
|
||||
if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
|
||||
return 0;
|
||||
|
@ -535,7 +551,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p)
|
|||
+ ((i & 1) ? nunits : 0));
|
||||
vec_perm_indices indices (sel, 2, nunits);
|
||||
if (can_vec_perm_const_p (mode, mode, indices))
|
||||
return 2;
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -551,7 +567,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p)
|
|||
sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
|
||||
vec_perm_indices indices (sel, 2, nunits);
|
||||
if (can_vec_perm_const_p (mode, mode, indices))
|
||||
return 3;
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -6751,10 +6751,13 @@ expand_mult_highpart (machine_mode mode, rtx op0, rtx op1,
|
|||
return expand_binop (mode, tab1, op0, op1, target, uns_p,
|
||||
OPTAB_LIB_WIDEN);
|
||||
case 2:
|
||||
return expmed_mult_highpart_optab (as_a <scalar_int_mode> (mode),
|
||||
op0, op1, target, uns_p, INT_MAX);
|
||||
case 3:
|
||||
tab1 = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
|
||||
tab2 = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
|
||||
break;
|
||||
case 3:
|
||||
case 4:
|
||||
tab1 = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
|
||||
tab2 = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
|
@ -6783,7 +6786,7 @@ expand_mult_highpart (machine_mode mode, rtx op0, rtx op1,
|
|||
m2 = gen_lowpart (mode, eops[0].value);
|
||||
|
||||
vec_perm_builder sel;
|
||||
if (method == 2)
|
||||
if (method == 3)
|
||||
{
|
||||
/* The encoding has 2 interleaved stepped patterns. */
|
||||
sel.new_vector (GET_MODE_NUNITS (mode), 2, 3);
|
||||
|
|
Loading…
Reference in New Issue