cmd/compile/internal/ssa: on PPC64, try combining CLRLSLDI and SRDconst into RLWINM

This provides a small performance bump to crc64 as measured on ppc64le/power10:

name              old time/op    new time/op    delta
Crc64/ISO64KB       49.6µs ± 0%    46.6µs ± 0%  -6.18%
Crc64/ISO4KB        3.16µs ± 0%    2.97µs ± 0%  -5.83%
Crc64/ISO1KB         840ns ± 0%     794ns ± 0%  -5.46%
Crc64/ECMA64KB      49.6µs ± 0%    46.5µs ± 0%  -6.20%
Crc64/Random64KB    53.1µs ± 0%    49.9µs ± 0%  -6.04%
Crc64/Random16KB    15.9µs ± 1%    15.0µs ± 0%  -5.73%

Change-Id: I302b5431c7dc46dfd2d211545c483bdcdfe011f1
Cq-Include-Trybots: luci.golang.try:gotip-linux-ppc64_power10,gotip-linux-ppc64_power8,gotip-linux-ppc64le_power8,gotip-linux-ppc64le_power9,gotip-linux-ppc64le_power10
Reviewed-on: https://go-review.googlesource.com/c/go/+/581937
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Eli Bendersky <eliben@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
changes/37/581937/4
Paul E. Murphy 2024-04-26 09:26:52 -05:00 committed by Paul Murphy
parent 99fc317948
commit 7994da4cc1
4 changed files with 66 additions and 1 deletions

View File

@ -158,6 +158,7 @@
// Merge shift right + shift left and clear left (e.g for a table lookup)
(CLRLSLDI [c] (SRWconst [s] x)) && mergePPC64ClrlsldiSrw(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrw(int64(c),s)] x)
(CLRLSLDI [c] (SRDconst [s] x)) && mergePPC64ClrlsldiSrd(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrd(int64(c),s)] x)
(SLDconst [l] (SRWconst [r] x)) && mergePPC64SldiSrw(l,r) != 0 => (RLWINM [mergePPC64SldiSrw(l,r)] x)
// The following reduction shows up frequently too. e.g b[(x>>14)&0xFF]
(CLRLSLDI [c] i:(RLWINM [s] x)) && mergePPC64ClrlsldiRlwinm(c,s) != 0 => (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x)

View File

@ -1589,7 +1589,7 @@ func mergePPC64AndSrwi(m, s int64) int64 {
return encodePPC64RotateMask((32-s)&31, mask, 32)
}
// Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
// Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
// Return the encoded RLWINM constant, or 0 if they cannot be merged.
func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
mask_1 := uint64(0xFFFFFFFF >> uint(srw))
@ -1609,6 +1609,31 @@ func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
}
// Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
// Return the encoded RLWINM constant, or 0 if they cannot be merged.
func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
// Rewrite mask to apply after the final left shift.
mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
r_1 := 64 - srd
r_2 := GetPPC64Shiftsh(sld)
r_3 := (r_1 + r_2) & 63 // This can wrap.
if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
return 0
}
// This combine only works when selecting and shifting the lower 32 bits.
v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
if v1&mask_3 != 0 {
return 0
}
return encodePPC64RotateMask(int64(r_3-32), int64(mask_3), 32)
}
// Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
// the encoded RLWINM constant, or 0 if they cannot be merged.
func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {

View File

@ -4628,6 +4628,24 @@ func rewriteValuePPC64_OpPPC64CLRLSLDI(v *Value) bool {
v.AddArg(x)
return true
}
// match: (CLRLSLDI [c] (SRDconst [s] x))
// cond: mergePPC64ClrlsldiSrd(int64(c),s) != 0
// result: (RLWINM [mergePPC64ClrlsldiSrd(int64(c),s)] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpPPC64SRDconst {
break
}
s := auxIntToInt64(v_0.AuxInt)
x := v_0.Args[0]
if !(mergePPC64ClrlsldiSrd(int64(c), s) != 0) {
break
}
v.reset(OpPPC64RLWINM)
v.AuxInt = int64ToAuxInt(mergePPC64ClrlsldiSrd(int64(c), s))
v.AddArg(x)
return true
}
// match: (CLRLSLDI [c] i:(RLWINM [s] x))
// cond: mergePPC64ClrlsldiRlwinm(c,s) != 0
// result: (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x)

View File

@ -453,6 +453,27 @@ func checkMergedShifts32(a [256]uint32, b [256]uint64, u uint32, v uint32) {
b[2] = b[v>>25]
}
func checkMergedShifts64(a [256]uint32, b [256]uint64, v uint64) {
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+"
a[0] = a[uint8(v>>24)]
// ppc64x: "SRD", "CLRLSLDI", -"RLWNM"
a[1] = a[uint8(v>>25)]
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]9, R[0-9]+, [$]23, [$]29, R[0-9]+"
a[2] = a[v>>25&0x7F]
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]3, R[0-9]+, [$]29, [$]29, R[0-9]+"
a[3] = a[(v>>31)&0x01]
// ppc64x: "SRD", "CLRLSLDI", -"RLWNM"
a[4] = a[(v>>30)&0x07]
// ppc64x: "SRD", "CLRLSLDI", -"RLWNM"
a[5] = a[(v>>32)&0x01]
// ppc64x: "SRD", "CLRLSLDI", -"RLWNM"
a[5] = a[(v>>34)&0x03]
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]12, R[0-9]+, [$]21, [$]28, R[0-9]+"
b[0] = b[uint8(v>>23)]
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+"
b[1] = b[(v>>20)&0xFF]
}
// 128 bit shifts
func check128bitShifts(x, y uint64, bits uint) (uint64, uint64) {