ICF&SRA: Make ICF and SRA agree on padding

PR 113359 shows that (at least with -fno-strict-aliasing) ICF can
unify two functions which copy an aggregate type of the same size but
then SRA, through its total scalarization, can copy the aggregate by
pieces, skipping paddding, but the padding was not the same in the two
original functions that ICF unified.

This patch enhances SRA with the ability to collect padding
information which then can be compared from within ICF.  Unfortunately
SRA uses OPTION_SET_P when determining its limits, so ICF needs to
switch cfuns at least once to figure it out too.

gcc/ChangeLog:

2024-03-27  Martin Jambor  <mjambor@suse.cz>

	PR ipa/113359
	* ipa-icf-gimple.h (func_checker): New members
	safe_for_total_scalarization_p, m_total_scalarization_limit_known_p
	and m_total_scalarization_limit.
	(func_checker::func_checker): Initialize new member variables.
	* ipa-icf-gimple.cc: Include tree-sra.h.
	(func_checker::func_checker): Initialize new member variables.
	(func_checker::safe_for_total_scalarization_p): New function.
	(func_checker::compare_operand): Use the new function.
	* tree-sra.h (sra_get_max_scalarization_size): Declare.
	(sra_total_scalarization_would_copy_same_data_p): Likewise.
	* tree-sra.cc (prepare_iteration_over_array_elts): New function.
	(class sra_padding_collecting): New.
	(sra_padding_collecting::record_padding): Likewise.
	(scalarizable_type_p): Rename to totally_scalarizable_type_p.  Add
	ability to record padding when requested.
	(totally_scalarize_subtree): Split out gathering information necessary
	to iterate over array elements to prepare_iteration_over_array_elts.
	Fix errornous early exit.
	(analyze_all_variable_accesses): Adjust the call to
	totally_scalarizable_type_p.  Move determining of total scalariation
	size limit...
	(sra_get_max_scalarization_size): ...here.
	(check_ts_and_push_padding_to_vec): New function.
	(sra_total_scalarization_would_copy_same_data_p): Likewise.

gcc/testsuite/ChangeLog:

2024-03-27  Martin Jambor  <mjambor@suse.cz>

	PR ipa/113359
	* gcc.dg/lto/pr113359-1_0.c: New.
	* gcc.dg/lto/pr113359-1_1.c: Likewise.
	* gcc.dg/lto/pr113359-2_0.c: Likewise.
	* gcc.dg/lto/pr113359-2_1.c: Likewise.
	* gcc.dg/lto/pr113359-3_0.c: Likewise.
	* gcc.dg/lto/pr113359-3_1.c: Likewise.
	* gcc.dg/lto/pr113359-4_0.c: Likewise.
	* gcc.dg/lto/pr113359-4_1.c: Likewise.
	* gcc.dg/lto/pr113359-5_0.c: Likewise.
	* gcc.dg/lto/pr113359-5_1.c: Likewise.
pull/46/merge
Martin Jambor 2024-04-08 18:53:23 +02:00
parent 1162861439
commit 1e3312a25a
No known key found for this signature in database
GPG Key ID: BF63C1BC3FA43540
14 changed files with 999 additions and 55 deletions

View File

@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see
#include "cfgloop.h"
#include "attribs.h"
#include "gimple-walk.h"
#include "tree-sra.h"
#include "tree-ssa-alias-compare.h"
#include "alloc-pool.h"
@ -64,7 +65,8 @@ func_checker::func_checker (tree source_func_decl, tree target_func_decl,
: m_source_func_decl (source_func_decl), m_target_func_decl (target_func_decl),
m_ignored_source_nodes (ignored_source_nodes),
m_ignored_target_nodes (ignored_target_nodes),
m_ignore_labels (ignore_labels), m_tbaa (tbaa)
m_ignore_labels (ignore_labels), m_tbaa (tbaa),
m_total_scalarization_limit_known_p (false)
{
function *source_func = DECL_STRUCT_FUNCTION (source_func_decl);
function *target_func = DECL_STRUCT_FUNCTION (target_func_decl);
@ -361,6 +363,36 @@ func_checker::operand_equal_p (const_tree t1, const_tree t2,
return operand_compare::operand_equal_p (t1, t2, flags);
}
/* Return true if either T1 and T2 cannot be totally scalarized or if doing
so would result in copying the same memory. Otherwise return false. */
bool
func_checker::safe_for_total_scalarization_p (tree t1, tree t2)
{
tree type1 = TREE_TYPE (t1);
tree type2 = TREE_TYPE (t2);
if (!AGGREGATE_TYPE_P (type1)
|| !AGGREGATE_TYPE_P (type2)
|| !tree_fits_uhwi_p (TYPE_SIZE (type1))
|| !tree_fits_uhwi_p (TYPE_SIZE (type2)))
return true;
if (!m_total_scalarization_limit_known_p)
{
push_cfun (DECL_STRUCT_FUNCTION (m_target_func_decl));
m_total_scalarization_limit = sra_get_max_scalarization_size ();
pop_cfun ();
m_total_scalarization_limit_known_p = true;
}
unsigned HOST_WIDE_INT sz = tree_to_uhwi (TYPE_SIZE (type1));
gcc_assert (sz == tree_to_uhwi (TYPE_SIZE (type2)));
if (sz > m_total_scalarization_limit)
return true;
return sra_total_scalarization_would_copy_same_data_p (type1, type2);
}
/* Function responsible for comparison of various operands T1 and T2
which are accessed as ACCESS.
If these components, from functions FUNC1 and FUNC2, are equal, true
@ -382,7 +414,12 @@ func_checker::compare_operand (tree t1, tree t2, operand_access_type access)
lto_streaming_expected_p (), m_tbaa);
if (!flags)
return true;
{
if (!safe_for_total_scalarization_p (t1, t2))
return return_false_with_msg
("total scalarization may not be equivalent");
return true;
}
if (flags & SEMANTICS)
return return_false_with_msg
("compare_ao_refs failed (semantic difference)");

View File

@ -125,7 +125,8 @@ public:
func_checker ():
m_source_func_decl (NULL_TREE), m_target_func_decl (NULL_TREE),
m_ignored_source_nodes (NULL), m_ignored_target_nodes (NULL),
m_ignore_labels (false), m_tbaa (true)
m_ignore_labels (false), m_tbaa (true),
m_total_scalarization_limit_known_p (false)
{
m_source_ssa_names.create (0);
m_target_ssa_names.create (0);
@ -205,6 +206,10 @@ public:
enum operand_access_type {OP_MEMORY, OP_NORMAL};
typedef hash_set<tree> operand_access_type_map;
/* Return true if either T1 and T2 cannot be totally scalarized or if doing
so would result in copying the same memory. Otherwise return false. */
bool safe_for_total_scalarization_p (tree t1, tree t2);
/* Function responsible for comparison of various operands T1 and T2.
If these components, from functions FUNC1 and FUNC2, are equal, true
is returned. */
@ -279,6 +284,14 @@ private:
/* Flag if we should compare type based alias analysis info. */
bool m_tbaa;
/* Set to true when total scalarization size has already been determined for
the functions. */
bool m_total_scalarization_limit_known_p;
/* When the above it set to true the determiend total scalarization
limit. */
unsigned HOST_WIDE_INT m_total_scalarization_limit;
public:
/* Return true if two operands are equal. The flags fields can be used
to specify OEP flags described above. */

View File

@ -0,0 +1,86 @@
/* { dg-lto-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra }} } */
#define CI 0xdeadbeef
#define CL1 0xdeaddead1234beef
#define CL2 0xdead1234deadbeef
struct SA
{
unsigned int ax;
unsigned long ay;
unsigned long az;
};
struct SB
{
unsigned long bx;
unsigned int by;
unsigned long bz;
};
struct ZA
{
int p;
struct SA s;
short q;
};
struct ZB
{
int p;
struct SB s;
short q;
};
void __attribute__((noinline))
geta (struct SA *d, struct ZA *p)
{
struct SA tmp = p->s;
*d = tmp;
}
void getb (struct SB *d, struct ZB *p);
struct ZA ga;
struct ZB gb;
void __attribute__((noipa))
init (void)
{
ga.s.ax = CI;
ga.s.ay = CL1;
ga.s.az = CL2;
gb.s.bx = CL1;
gb.s.by = CI;
gb.s.bz = CL2;
}
int
main (int argc, char **argv)
{
init();
struct SA a;
geta (&a, &ga);
if (a.ax != CI)
__builtin_abort ();
if (a.ay != CL1)
__builtin_abort ();
if (a.az != CL2)
__builtin_abort ();
struct SB b;
getb (&b, &gb);
if (b.bx != CL1)
__builtin_abort ();
if (b.by != CI)
__builtin_abort ();
if (b.bz != CL2)
__builtin_abort ();
return 0;
}

View File

@ -0,0 +1,38 @@
#define CI 0xdeadbeef
#define CL1 0xdeaddead1234beef
#define CL2 0xdead1234deadbeef
struct SA
{
unsigned int ax;
unsigned long ay;
unsigned long az;
};
struct SB
{
unsigned long bx;
unsigned int by;
unsigned long bz;
};
struct ZA
{
int p;
struct SA s;
short q;
};
struct ZB
{
int p;
struct SB s;
short q;
};
void __attribute__((noinline))
getb (struct SB *d, struct ZB *p)
{
struct SB tmp = p->s;
*d = tmp;
}

View File

@ -0,0 +1,87 @@
/* { dg-lto-do run } */
/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra -fdump-ipa-icf-details }} } */
#define CI 0xdeadbeef
#define CL1 0xdeaddead1234beef
#define CL2 0xdead1234deadbeef
struct SA
{
unsigned int ax;
unsigned long ay;
unsigned long az;
};
struct SB
{
unsigned int bx;
unsigned long by;
unsigned long bz;
};
struct ZA
{
int p;
struct SA s;
short q;
};
struct ZB
{
int p;
struct SB s;
short q;
};
void __attribute__((noinline))
geta (struct SA *d, struct ZA *p)
{
struct SA tmp = p->s;
*d = tmp;
}
void getb (struct SB *d, struct ZB *p);
struct ZA ga;
struct ZB gb;
void __attribute__((noipa))
init (void)
{
ga.s.ax = CI;
ga.s.ay = CL1;
ga.s.az = CL2;
gb.s.bx = CI;
gb.s.by = CL1;
gb.s.bz = CL2;
}
int
main (int argc, char **argv)
{
init();
struct SA a;
geta (&a, &ga);
if (a.ax != CI)
__builtin_abort ();
if (a.ay != CL1)
__builtin_abort ();
if (a.az != CL2)
__builtin_abort ();
struct SB b;
getb (&b, &gb);
if (b.bx != CI)
__builtin_abort ();
if (b.by != CL1)
__builtin_abort ();
if (b.bz != CL2)
__builtin_abort ();
return 0;
}
/* { dg-final { scan-wpa-ipa-dump "Semantic equality hit:geta/.*getb/" "icf" } } */

View File

@ -0,0 +1,38 @@
#define CI 0xdeadbeef
#define CL1 0xdeaddead1234beef
#define CL2 0xdead1234deadbeef
struct SA
{
unsigned int ax;
unsigned long ay;
unsigned long az;
};
struct SB
{
unsigned int bx;
unsigned long by;
unsigned long bz;
};
struct ZA
{
int p;
struct SA s;
short q;
};
struct ZB
{
int p;
struct SB s;
short q;
};
void __attribute__((noinline))
getb (struct SB *d, struct ZB *p)
{
struct SB tmp = p->s;
*d = tmp;
}

View File

@ -0,0 +1,114 @@
/* { dg-lto-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra }} } */
#define CI 0xdeadbeef
#define CL1 0xdeaddead1234beef
#define CL2 0xdead1234deadbeef
struct AA
{
unsigned int ax;
unsigned long ay;
unsigned long az;
};
struct SA
{
int p;
struct AA arr[2];
};
struct ZA
{
struct SA s;
short q;
};
struct AB
{
unsigned long bx;
unsigned int by;
unsigned long bz;
};
struct SB
{
int p;
struct AB arr[2];
};
struct ZB
{
struct SB s;
short q;
};
void __attribute__((noinline))
geta (struct SA *d, struct ZA *p)
{
struct SA tmp = p->s;
*d = tmp;
}
void getb (struct SB *d, struct ZB *p);
struct ZA ga;
struct ZB gb;
void __attribute__((noipa))
init (void)
{
ga.s.arr[0].ax = CI;
ga.s.arr[0].ay = CL1;
ga.s.arr[0].az = CL2;
ga.s.arr[1].ax = CI;
ga.s.arr[1].ay = CL1;
ga.s.arr[1].az = CL2;
gb.s.arr[0].bx = CL1;
gb.s.arr[0].by = CI;
gb.s.arr[0].bz = CL2;
gb.s.arr[1].bx = CL1;
gb.s.arr[1].by = CI;
gb.s.arr[1].bz = CL2;
}
int
main (int argc, char **argv)
{
init();
struct SA a;
geta (&a, &ga);
if (a.arr[0].ax != CI)
__builtin_abort ();
if (a.arr[0].ay != CL1)
__builtin_abort ();
if (a.arr[0].az != CL2)
__builtin_abort ();
if (a.arr[1].ax != CI)
__builtin_abort ();
if (a.arr[1].ay != CL1)
__builtin_abort ();
if (a.arr[1].az != CL2)
__builtin_abort ();
struct SB b;
getb (&b, &gb);
if (b.arr[0].bx != CL1)
__builtin_abort ();
if (b.arr[0].by != CI)
__builtin_abort ();
if (b.arr[0].bz != CL2)
__builtin_abort ();
if (b.arr[1].bx != CL1)
__builtin_abort ();
if (b.arr[1].by != CI)
__builtin_abort ();
if (b.arr[1].bz != CL2)
__builtin_abort ();
return 0;
}

View File

@ -0,0 +1,49 @@
#define CI 0xdeadbeef
#define CL1 0xdeaddead1234beef
#define CL2 0xdead1234deadbeef
struct AA
{
unsigned int ax;
unsigned long ay;
unsigned long az;
};
struct SA
{
int p;
struct AA arr[2];
};
struct ZA
{
struct SA s;
short q;
};
struct AB
{
unsigned long bx;
unsigned int by;
unsigned long bz;
};
struct SB
{
int p;
struct AB arr[2];
};
struct ZB
{
struct SB s;
short q;
};
void __attribute__((noinline))
getb (struct SB *d, struct ZB *p)
{
struct SB tmp = p->s;
*d = tmp;
}

View File

@ -0,0 +1,114 @@
/* { dg-lto-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra }} } */
#define CI 0xdeadbeef
#define CL1 0xdeaddead1234beef
#define CL2 0xdead1234deadbeef
struct AA
{
unsigned int ax;
unsigned long ay;
unsigned long az;
};
struct SA
{
int p;
struct AA arr[2];
};
struct ZA
{
struct SA s;
short q;
};
struct AB
{
unsigned long bx;
unsigned long by;
unsigned int bz;
};
struct SB
{
int p;
struct AB arr[2];
};
struct ZB
{
struct SB s;
short q;
};
void __attribute__((noinline))
geta (struct SA *d, struct ZA *p)
{
struct SA tmp = p->s;
*d = tmp;
}
void getb (struct SB *d, struct ZB *p);
struct ZA ga;
struct ZB gb;
void __attribute__((noipa))
init (void)
{
ga.s.arr[0].ax = CI;
ga.s.arr[0].ay = CL1;
ga.s.arr[0].az = CL2;
ga.s.arr[1].ax = CI;
ga.s.arr[1].ay = CL1;
ga.s.arr[1].az = CL2;
gb.s.arr[0].bx = CL1;
gb.s.arr[0].by = CL2;
gb.s.arr[0].bz = CI;
gb.s.arr[1].bx = CL1;
gb.s.arr[1].by = CL2;
gb.s.arr[1].bz = CI;
}
int
main (int argc, char **argv)
{
init();
struct SA a;
geta (&a, &ga);
if (a.arr[0].ax != CI)
__builtin_abort ();
if (a.arr[0].ay != CL1)
__builtin_abort ();
if (a.arr[0].az != CL2)
__builtin_abort ();
if (a.arr[1].ax != CI)
__builtin_abort ();
if (a.arr[1].ay != CL1)
__builtin_abort ();
if (a.arr[1].az != CL2)
__builtin_abort ();
struct SB b;
getb (&b, &gb);
if (b.arr[0].bx != CL1)
__builtin_abort ();
if (b.arr[0].by != CL2)
__builtin_abort ();
if (b.arr[0].bz != CI)
__builtin_abort ();
if (b.arr[1].bx != CL1)
__builtin_abort ();
if (b.arr[1].by != CL2)
__builtin_abort ();
if (b.arr[1].bz != CI)
__builtin_abort ();
return 0;
}

View File

@ -0,0 +1,49 @@
#define CI 0xdeadbeef
#define CL1 0xdeaddead1234beef
#define CL2 0xdead1234deadbeef
struct AA
{
unsigned int ax;
unsigned long ay;
unsigned long az;
};
struct SA
{
int p;
struct AA arr[2];
};
struct ZA
{
struct SA s;
short q;
};
struct AB
{
unsigned long bx;
unsigned long by;
unsigned int bz;
};
struct SB
{
int p;
struct AB arr[2];
};
struct ZB
{
struct SB s;
short q;
};
void __attribute__((noinline))
getb (struct SB *d, struct ZB *p)
{
struct SB tmp = p->s;
*d = tmp;
}

View File

@ -0,0 +1,118 @@
/* { dg-lto-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra -fdump-ipa-icf-details }} } */
#define CI 0xdeadbeef
#define CL1 0xdeaddead1234beef
#define CL2 0xdead1234deadbeef
struct AA
{
unsigned int ax;
unsigned long ay;
unsigned long az;
};
struct SA
{
int p;
struct AA arr[2];
short ee;
};
struct ZA
{
struct SA s;
short q;
};
struct AB
{
unsigned int bx;
unsigned long by;
unsigned long bz;
};
struct SB
{
int p;
struct AB arr[2];
short ee;
};
struct ZB
{
struct SB s;
short q;
};
void __attribute__((noinline))
geta (struct SA *d, struct ZA *p)
{
struct SA tmp = p->s;
*d = tmp;
}
void getb (struct SB *d, struct ZB *p);
struct ZA ga;
struct ZB gb;
void __attribute__((noipa))
init (void)
{
ga.s.arr[0].ax = CI;
ga.s.arr[0].ay = CL1;
ga.s.arr[0].az = CL2;
ga.s.arr[1].ax = CI;
ga.s.arr[1].ay = CL1;
ga.s.arr[1].az = CL2;
gb.s.arr[0].bx = CI;
gb.s.arr[0].by = CL1;
gb.s.arr[0].bz = CL2;
gb.s.arr[1].bx = CI;
gb.s.arr[1].by = CL1;
gb.s.arr[1].bz = CL2;
}
int
main (int argc, char **argv)
{
init();
struct SA a;
geta (&a, &ga);
if (a.arr[0].ax != CI)
__builtin_abort ();
if (a.arr[0].ay != CL1)
__builtin_abort ();
if (a.arr[0].az != CL2)
__builtin_abort ();
if (a.arr[1].ax != CI)
__builtin_abort ();
if (a.arr[1].ay != CL1)
__builtin_abort ();
if (a.arr[1].az != CL2)
__builtin_abort ();
struct SB b;
getb (&b, &gb);
if (b.arr[0].bx != CI)
__builtin_abort ();
if (b.arr[0].by != CL1)
__builtin_abort ();
if (b.arr[0].bz != CL2)
__builtin_abort ();
if (b.arr[1].bx != CI)
__builtin_abort ();
if (b.arr[1].by != CL1)
__builtin_abort ();
if (b.arr[1].bz != CL2)
__builtin_abort ();
return 0;
}
/* { dg-final { scan-wpa-ipa-dump "Semantic equality hit:geta/.*getb/" "icf" } } */

View File

@ -0,0 +1,50 @@
#define CI 0xdeadbeef
#define CL1 0xdeaddead1234beef
#define CL2 0xdead1234deadbeef
struct AA
{
unsigned int ax;
unsigned long ay;
unsigned long az;
};
struct SA
{
int p;
struct AA arr[2];
short ee;
};
struct ZA
{
struct SA s;
short q;
};
struct AB
{
unsigned int bx;
unsigned long by;
unsigned long bz;
};
struct SB
{
int p;
struct AB arr[2];
short ee;
};
struct ZB
{
struct SB s;
short q;
};
void __attribute__((noinline))
getb (struct SB *d, struct ZB *p)
{
struct SB tmp = p->s;
*d = tmp;
}

View File

@ -985,18 +985,101 @@ create_access (tree expr, gimple *stmt, bool write)
return access;
}
/* Return true iff TYPE is scalarizable - i.e. a RECORD_TYPE or fixed-length
ARRAY_TYPE with fields that are either of gimple register types (excluding
bit-fields) or (recursively) scalarizable types. CONST_DECL must be true if
we are considering a decl from constant pool. If it is false, char arrays
will be refused. */
/* Given an array type TYPE, extract element size to *EL_SIZE, minimum index to
*IDX and maximum index to *MAX so that the caller can iterate over all
elements and return true, except if the array is known to be zero-length,
then return false. */
static bool
scalarizable_type_p (tree type, bool const_decl)
prepare_iteration_over_array_elts (tree type, HOST_WIDE_INT *el_size,
offset_int *idx, offset_int *max)
{
tree elem_size = TYPE_SIZE (TREE_TYPE (type));
gcc_assert (elem_size && tree_fits_shwi_p (elem_size));
*el_size = tree_to_shwi (elem_size);
gcc_assert (*el_size > 0);
tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (type));
gcc_assert (TREE_CODE (minidx) == INTEGER_CST);
tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
/* Skip (some) zero-length arrays; others have MAXIDX == MINIDX - 1. */
if (!maxidx)
return false;
gcc_assert (TREE_CODE (maxidx) == INTEGER_CST);
tree domain = TYPE_DOMAIN (type);
/* MINIDX and MAXIDX are inclusive, and must be interpreted in
DOMAIN (e.g. signed int, whereas min/max may be size_int). */
*idx = wi::to_offset (minidx);
*max = wi::to_offset (maxidx);
if (!TYPE_UNSIGNED (domain))
{
*idx = wi::sext (*idx, TYPE_PRECISION (domain));
*max = wi::sext (*max, TYPE_PRECISION (domain));
}
return true;
}
/* A structure to track collecting padding and hold collected padding
information. */
class sra_padding_collecting
{
public:
/* Given that there won't be any data until at least OFFSET, add an
appropriate entry to the list of paddings or extend the last one. */
void record_padding (HOST_WIDE_INT offset);
/* Vector of pairs describing contiguous pieces of padding, each pair
consisting of offset and length. */
auto_vec<std::pair<HOST_WIDE_INT, HOST_WIDE_INT>, 10> m_padding;
/* Offset where data should continue after the last seen actual bit of data
if there was no padding. */
HOST_WIDE_INT m_data_until = 0;
};
/* Given that there won't be any data until at least OFFSET, add an appropriate
entry to the list of paddings or extend the last one. */
void sra_padding_collecting::record_padding (HOST_WIDE_INT offset)
{
if (offset > m_data_until)
{
HOST_WIDE_INT psz = offset - m_data_until;
if (!m_padding.is_empty ()
&& ((m_padding[m_padding.length () - 1].first
+ m_padding[m_padding.length () - 1].second) == offset))
m_padding[m_padding.length () - 1].second += psz;
else
m_padding.safe_push (std::make_pair (m_data_until, psz));
}
}
/* Return true iff TYPE is totally scalarizable - i.e. a RECORD_TYPE or
fixed-length ARRAY_TYPE with fields that are either of gimple register types
(excluding bit-fields) or (recursively) scalarizable types. CONST_DECL must
be true if we are considering a decl from constant pool. If it is false,
char arrays will be refused.
TOTAL_OFFSET is the offset of TYPE within any outer type that is being
examined.
If PC is non-NULL, collect padding information into the vector within the
structure. The information is however only complete if the function returns
true and does not contain any padding at its end. */
static bool
totally_scalarizable_type_p (tree type, bool const_decl,
HOST_WIDE_INT total_offset,
sra_padding_collecting *pc)
{
if (is_gimple_reg_type (type))
return true;
{
if (pc)
{
pc->record_padding (total_offset);
pc->m_data_until = total_offset + tree_to_shwi (TYPE_SIZE (type));
}
return true;
}
if (type_contains_placeholder_p (type))
return false;
@ -1011,6 +1094,8 @@ scalarizable_type_p (tree type, bool const_decl)
{
tree ft = TREE_TYPE (fld);
if (!DECL_SIZE (fld))
return false;
if (zerop (DECL_SIZE (fld)))
continue;
@ -1025,7 +1110,8 @@ scalarizable_type_p (tree type, bool const_decl)
if (DECL_BIT_FIELD (fld))
return false;
if (!scalarizable_type_p (ft, const_decl))
if (!totally_scalarizable_type_p (ft, const_decl, total_offset + pos,
pc))
return false;
}
@ -1054,9 +1140,35 @@ scalarizable_type_p (tree type, bool const_decl)
/* Variable-length array, do not allow scalarization. */
return false;
unsigned old_padding_len = 0;
if (pc)
old_padding_len = pc->m_padding.length ();
tree elem = TREE_TYPE (type);
if (!scalarizable_type_p (elem, const_decl))
if (!totally_scalarizable_type_p (elem, const_decl, total_offset, pc))
return false;
if (pc)
{
unsigned new_padding_len = pc->m_padding.length ();
HOST_WIDE_INT el_size;
offset_int idx, max;
if (!prepare_iteration_over_array_elts (type, &el_size, &idx, &max))
return true;
pc->record_padding (total_offset + el_size);
++idx;
for (HOST_WIDE_INT pos = total_offset + el_size;
idx <= max;
pos += el_size, ++idx)
{
for (unsigned i = old_padding_len; i < new_padding_len; i++)
{
HOST_WIDE_INT pp
= pos + pc->m_padding[i].first - total_offset;
HOST_WIDE_INT psz = pc->m_padding[i].second;
pc->m_padding.safe_push (std::make_pair (pp, psz));
}
}
pc->m_data_until = total_offset + tree_to_shwi (TYPE_SIZE (type));
}
return true;
}
default:
@ -3540,28 +3652,12 @@ totally_scalarize_subtree (struct access *root)
case ARRAY_TYPE:
{
tree elemtype = TREE_TYPE (root->type);
tree elem_size = TYPE_SIZE (elemtype);
gcc_assert (elem_size && tree_fits_shwi_p (elem_size));
HOST_WIDE_INT el_size = tree_to_shwi (elem_size);
gcc_assert (el_size > 0);
HOST_WIDE_INT el_size;
offset_int idx, max;
if (!prepare_iteration_over_array_elts (root->type, &el_size,
&idx, &max))
break;
tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (root->type));
gcc_assert (TREE_CODE (minidx) == INTEGER_CST);
tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (root->type));
/* Skip (some) zero-length arrays; others have MAXIDX == MINIDX - 1. */
if (!maxidx)
goto out;
gcc_assert (TREE_CODE (maxidx) == INTEGER_CST);
tree domain = TYPE_DOMAIN (root->type);
/* MINIDX and MAXIDX are inclusive, and must be interpreted in
DOMAIN (e.g. signed int, whereas min/max may be size_int). */
offset_int idx = wi::to_offset (minidx);
offset_int max = wi::to_offset (maxidx);
if (!TYPE_UNSIGNED (domain))
{
idx = wi::sext (idx, TYPE_PRECISION (domain));
max = wi::sext (max, TYPE_PRECISION (domain));
}
for (HOST_WIDE_INT pos = root->offset;
idx <= max;
pos += el_size, ++idx)
@ -3587,7 +3683,8 @@ totally_scalarize_subtree (struct access *root)
? &last_seen_sibling->next_sibling
: &root->first_child);
tree nref = build4 (ARRAY_REF, elemtype, root->expr,
wide_int_to_tree (domain, idx),
wide_int_to_tree (TYPE_DOMAIN (root->type),
idx),
NULL_TREE, NULL_TREE);
struct access *new_child
= create_total_access_and_reshape (root, pos, el_size, elemtype,
@ -3605,11 +3702,34 @@ totally_scalarize_subtree (struct access *root)
default:
gcc_unreachable ();
}
out:
return true;
}
/* Get the total total scalarization size limit in the current function. */
unsigned HOST_WIDE_INT
sra_get_max_scalarization_size (void)
{
bool optimize_speed_p = !optimize_function_for_size_p (cfun);
/* If the user didn't set PARAM_SRA_MAX_SCALARIZATION_SIZE_<...>,
fall back to a target default. */
unsigned HOST_WIDE_INT max_scalarization_size
= get_move_ratio (optimize_speed_p) * UNITS_PER_WORD;
if (optimize_speed_p)
{
if (OPTION_SET_P (param_sra_max_scalarization_size_speed))
max_scalarization_size = param_sra_max_scalarization_size_speed;
}
else
{
if (OPTION_SET_P (param_sra_max_scalarization_size_size))
max_scalarization_size = param_sra_max_scalarization_size_size;
}
max_scalarization_size *= BITS_PER_UNIT;
return max_scalarization_size;
}
/* Go through all accesses collected throughout the (intraprocedural) analysis
stage, exclude overlapping ones, identify representatives and build trees
out of them, making decisions about scalarization on the way. Return true
@ -3637,24 +3757,8 @@ analyze_all_variable_accesses (void)
propagate_all_subaccesses ();
bool optimize_speed_p = !optimize_function_for_size_p (cfun);
/* If the user didn't set PARAM_SRA_MAX_SCALARIZATION_SIZE_<...>,
fall back to a target default. */
unsigned HOST_WIDE_INT max_scalarization_size
= get_move_ratio (optimize_speed_p) * UNITS_PER_WORD;
if (optimize_speed_p)
{
if (OPTION_SET_P (param_sra_max_scalarization_size_speed))
max_scalarization_size = param_sra_max_scalarization_size_speed;
}
else
{
if (OPTION_SET_P (param_sra_max_scalarization_size_size))
max_scalarization_size = param_sra_max_scalarization_size_size;
}
max_scalarization_size *= BITS_PER_UNIT;
= sra_get_max_scalarization_size ();
EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi)
if (bitmap_bit_p (should_scalarize_away_bitmap, i)
&& !bitmap_bit_p (cannot_scalarize_away_bitmap, i))
@ -3679,7 +3783,9 @@ analyze_all_variable_accesses (void)
access;
access = access->next_grp)
if (!can_totally_scalarize_forest_p (access)
|| !scalarizable_type_p (access->type, constant_decl_p (var)))
|| !totally_scalarizable_type_p (access->type,
constant_decl_p (var),
0, nullptr))
{
all_types_ok = false;
break;
@ -5100,3 +5206,45 @@ make_pass_sra (gcc::context *ctxt)
{
return new pass_sra (ctxt);
}
/* If type T cannot be totally scalarized, return false. Otherwise return true
and push to the vector within PC offsets and lengths of all padding in the
type as total scalarization would encounter it. */
static bool
check_ts_and_push_padding_to_vec (tree type, sra_padding_collecting *pc)
{
if (!totally_scalarizable_type_p (type, true /* optimistic value */,
0, pc))
return false;
pc->record_padding (tree_to_shwi (TYPE_SIZE (type)));
return true;
}
/* Given two types in an assignment, return true either if any one cannot be
totally scalarized or if they have padding (i.e. not copied bits) */
bool
sra_total_scalarization_would_copy_same_data_p (tree t1, tree t2)
{
sra_padding_collecting p1;
if (!check_ts_and_push_padding_to_vec (t1, &p1))
return true;
sra_padding_collecting p2;
if (!check_ts_and_push_padding_to_vec (t2, &p2))
return true;
unsigned l = p1.m_padding.length ();
if (l != p2.m_padding.length ())
return false;
for (unsigned i = 0; i < l; i++)
if (p1.m_padding[i].first != p2.m_padding[i].first
|| p1.m_padding[i].second != p2.m_padding[i].second)
return false;
return true;
}

View File

@ -20,6 +20,9 @@ along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
bool type_internals_preclude_sra_p (tree type, const char **msg);
unsigned HOST_WIDE_INT sra_get_max_scalarization_size (void);
bool sra_total_scalarization_would_copy_same_data_p (tree t1, tree t2);
/* Return true iff TYPE is stdarg va_list type (which early SRA and IPA-SRA
should leave alone). */