Details
Description
I witnessed some code sequence like the following in specjbb2005 and other benchmarks:
61976 0x0000007f7e17ff44: mov w13, wzr
61977 0x0000007f7e17ff48: cmp w12, w13
61978 0x0000007f7e17ff4c: csel w11, w12, w13, gt ;*iinc {reexecute=0 rethrow=0 return_oop=0}
61979 ; - jdk.internal.org.objectweb.asm.Type::getArgumentsAndReturnSizes@103 (line 451)
......
123349 0x0000007f7e1db660: mov w16, wzr
123350 0x0000007f7e1db664: sub w10, w19, w23 ;*iload {reexecute=0 rethrow=0 return_oop=0}
123351 ; - java.math.MutableBigInteger::divideMagnitude@182 (line 1503)
123352
123353 0x0000007f7e1db668: cmp w15, w16
123354 0x0000007f7e1db66c: csel w18, w15, w16, gt
123355 0x0000007f7e1db670: mov w25, wzr ;*iastore {reexecute=0 rethrow=0 return_oop=0}
123356 ; - java.math.MutableBigInteger::divideMagnitude@221 (line 1506)
......
187167 0x0000007f7e23a6f0: mov w12, wzr
187168 0x0000007f7e23a6f4: cmp w11, w12
187169 0x0000007f7e23a6f8: csel w10, w11, w12, gt
......
17114 0x0000007f7e124058: orr w14, wzr, #0x1
17115
17116 0x0000007f7e12405c: mov w10, wzr
17117 0x0000007f7e124060: cmp w13, w14
17118 0x0000007f7e124064: csel w12, w13, w14, gt
......
61072 0x0000007f7e179450: orr w11, wzr, #0x1
61073 0x0000007f7e179454: cmp w10, w11
61074 0x0000007f7e179458: csel w12, w10, w11, gt
For the aarch64 port, we have csel/csinc/csinv instructions which can be applied for these cases eliminating one extra mov instruction here.
I mean something like this:
min(x, 1) which becomes
cmp x, 0
csinc x, x, zr, le
min(x, -1) which becomes
cmp x, 0
csinv x, x, zr, lt
max(x, 1)
cmp x, 0
csinc x, x, zr, gt
max(x, -1)
cmp x, 0
csinv x, x, zr, ge
61976 0x0000007f7e17ff44: mov w13, wzr
61977 0x0000007f7e17ff48: cmp w12, w13
61978 0x0000007f7e17ff4c: csel w11, w12, w13, gt ;*iinc {reexecute=0 rethrow=0 return_oop=0}
61979 ; - jdk.internal.org.objectweb.asm.Type::getArgumentsAndReturnSizes@103 (line 451)
......
123349 0x0000007f7e1db660: mov w16, wzr
123350 0x0000007f7e1db664: sub w10, w19, w23 ;*iload {reexecute=0 rethrow=0 return_oop=0}
123351 ; - java.math.MutableBigInteger::divideMagnitude@182 (line 1503)
123352
123353 0x0000007f7e1db668: cmp w15, w16
123354 0x0000007f7e1db66c: csel w18, w15, w16, gt
123355 0x0000007f7e1db670: mov w25, wzr ;*iastore {reexecute=0 rethrow=0 return_oop=0}
123356 ; - java.math.MutableBigInteger::divideMagnitude@221 (line 1506)
......
187167 0x0000007f7e23a6f0: mov w12, wzr
187168 0x0000007f7e23a6f4: cmp w11, w12
187169 0x0000007f7e23a6f8: csel w10, w11, w12, gt
......
17114 0x0000007f7e124058: orr w14, wzr, #0x1
17115
17116 0x0000007f7e12405c: mov w10, wzr
17117 0x0000007f7e124060: cmp w13, w14
17118 0x0000007f7e124064: csel w12, w13, w14, gt
......
61072 0x0000007f7e179450: orr w11, wzr, #0x1
61073 0x0000007f7e179454: cmp w10, w11
61074 0x0000007f7e179458: csel w12, w10, w11, gt
For the aarch64 port, we have csel/csinc/csinv instructions which can be applied for these cases eliminating one extra mov instruction here.
I mean something like this:
min(x, 1) which becomes
cmp x, 0
csinc x, x, zr, le
min(x, -1) which becomes
cmp x, 0
csinv x, x, zr, lt
max(x, 1)
cmp x, 0
csinc x, x, zr, gt
max(x, -1)
cmp x, 0
csinv x, x, zr, ge