Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-8223020

aarch64: expand minI_rReg and maxI_rReg patterns into separate instructions

    XMLWordPrintable

    Details

    • Subcomponent:
    • Resolved In Build:
      b19
    • CPU:
      aarch64
    • OS:
      generic

      Backports

        Description

        Currently, two instructions will be emitted for minI_rReg/maxI_rReg patterns: cmpw + cselw.
        As these two instructions are always emitted together, the GCM (Global Code Motion) phase will
        not be able to schedule them independently.

        Example test case:

        public class BCE {

            final static int MAX = 1024 * 16;
            private static int total = 0;
            private static final int[] numbers = new int[32];

            public static void main(String[] args) throws Exception {
                for (int i = 0; i < MAX; i++) {
                    try { addAll(-16, 16); } catch (Exception e) {}
                    try { addAll(0, 32); } catch (Exception e) {}
                    try { addAll(16, 48); } catch (Exception e) {}
                }

                Thread.sleep(4000);
                System.out.println("total = " + total);
            }

            public static void addAll(int x, int y) {
                for (int i = x; i < y; i++) {
                    total += numbers[i];
                }
            }

        }

        $ java -XX:-TieredCompilation -XX:CompileCommand=compileonly,BCE.addAll -XX:+PrintAssembly BCE

        For BCE.addAll method, we see the following C2 JITed code snippet:

          # {method} {0x0000ffff55d67610} 'addAll' '(II)V' in 'BCE'
          # parm0: c_rarg1 = int
          # parm1: c_rarg2 = int
          # [sp+0x20] (sp of caller)
         ;; N1: # B1 <- B16 B14 Freq: 1

         ;; B1: # B14 B2 <- BLOCK HEAD IS JUNK Freq: 1

          0x0000ffffa34148c0: nop
          0x0000ffffa34148c4: sub x9, sp, #0x19, lsl #12
          0x0000ffffa34148c8: str xzr, [x9]
          0x0000ffffa34148cc: sub sp, sp, #0x20
          0x0000ffffa34148d0: stp x29, x30, [sp,#16] ;*synchronization entry
                                                                    ; - BCE::addAll@-1 (line 19)

          0x0000ffffa34148d4: cmp w1, w2
          0x0000ffffa34148d8: b.ge 0x0000ffffa34149f0 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
                                                                    ; - BCE::addAll@4 (line 19)

         ;; B2: # B3 <- B1 Freq: 0.98039

         ;; 0x280A108
          0x0000ffffa34148dc: mov x10, #0x0 // #0
                                                                    ; {oop(a 'java/lang/ArrayIndexOutOfBoundsException'{0x0000000101e00000})}
          0x0000ffffa34148e0: movk x10, #0x1e0, lsl #16
          0x0000ffffa34148e4: movk x10, #0x1, lsl #32
          0x0000ffffa34148e8: ldr w16, [x10,#16]
          0x0000ffffa34148ec: add w10, w1, #0x1
          0x0000ffffa34148f0: mov w12, wzr <========
          0x0000ffffa34148f4: cmp w10, w12 <========
          0x0000ffffa34148f8: csel w10, w10, w12, gt <========
          0x0000ffffa34148fc: cmp w10, w2 <========
          0x0000ffffa3414900: csel w10, w10, w2, lt <========
         ;; 0x280A100
          0x0000ffffa3414904: mov x18, #0xcde0 // #52704
                                                                    ; {oop([I{0x0000000101c4cde0})}
          0x0000ffffa3414908: movk x18, #0x1c4, lsl #16
          0x0000ffffa341490c: movk x18, #0x1, lsl #32
         ;; 0x280A0F8
          0x0000ffffa3414910: mov x3, #0xc428 // #50216
                                                                    ; {oop(a 'java/lang/Class'{0x0000000101c4c428} = 'BCE')}
          0x0000ffffa3414914: movk x3, #0x1c4, lsl #16
          0x0000ffffa3414918: movk x3, #0x1, lsl #32 ;*getstatic total {reexecute=0 rethrow=0 return_oop=0}


        After minI_rReg and maxI_rReg patterns expanded into two separate instructions, we will see:
          # {method} {0x0000ffff4eb9c610} 'addAll' '(II)V' in 'BCE'
          # parm0: c_rarg1 = int
          # parm1: c_rarg2 = int
          # [sp+0x20] (sp of caller)
         ;; N1: # B1 <- B16 B14 Freq: 1

         ;; B1: # B14 B2 <- BLOCK HEAD IS JUNK Freq: 1

          0x0000ffff9c249940: nop
          0x0000ffff9c249944: sub x9, sp, #0x19, lsl #12
          0x0000ffff9c249948: str xzr, [x9]
          0x0000ffff9c24994c: sub sp, sp, #0x20
          0x0000ffff9c249950: stp x29, x30, [sp,#16] ;*synchronization entry
                                                                    ; - BCE::addAll@-1 (line 19)

          0x0000ffff9c249954: cmp w1, w2
          0x0000ffff9c249958: b.ge 0x0000ffff9c249a74 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
                                                                    ; - BCE::addAll@4 (line 19)

         ;; B2: # B3 <- B1 Freq: 0.980392

         ;; 0x3359E588
          0x0000ffff9c24995c: mov x10, #0x0 // #0
                                                                    ; {oop(a 'java/lang/ArrayIndexOutOfBoundsException'{0x0000000101e00000})}
          0x0000ffff9c249960: movk x10, #0x1e0, lsl #16
          0x0000ffff9c249964: movk x10, #0x1, lsl #32
          0x0000ffff9c249968: add w12, w1, #0x1
          0x0000ffff9c24996c: ldr w11, [x10,#16]
          0x0000ffff9c249970: mov w10, wzr <========
          0x0000ffff9c249974: cmp w12, w10 <========
          0x0000ffff9c249978: csel w10, w12, w10, gt <========
          0x0000ffff9c24997c: cmp w10, w2 <========
         ;; 0x3359E580
          0x0000ffff9c249980: mov x16, #0xcda8 // #52648
                                                                    ; {oop([I{0x0000000101c4cda8})}
          0x0000ffff9c249984: movk x16, #0x1c4, lsl #16
          0x0000ffff9c249988: movk x16, #0x1, lsl #32
         ;; 0x3359E578
          0x0000ffff9c24998c: mov x18, #0xc3f0 // #50160
                                                                    ; {oop(a 'java/lang/Class'{0x0000000101c4c3f0} = 'BCE')}
          0x0000ffff9c249990: movk x18, #0x1c4, lsl #16
          0x0000ffff9c249994: movk x18, #0x1, lsl #32
          0x0000ffff9c249998: csel w10, w10, w2, lt <======== ;*getstatic total {reexecute=0 rethrow=0 return_oop=0}

          Attachments

            Issue Links

              Activity

                People

                Assignee:
                fyang Fei Yang
                Reporter:
                fyang Fei Yang
                Votes:
                0 Vote for this issue
                Watchers:
                3 Start watching this issue

                  Dates

                  Created:
                  Updated:
                  Resolved: