8029940: PPC64 (part 122): C2 compiler port

Reviewed-by: kvn
2013-12-11 00:06:11 +01:00 · 2013-12-11 00:06:11 +01:00 · 7d56518671
commit 7d56518671
parent c50c083f83
40 changed files with 13274 additions and 593 deletions
--- a/hotspot/make/aix/makefiles/adlc.make
+++ b/hotspot/make/aix/makefiles/adlc.make
@ -41,13 +41,11 @@ SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad

 ifeq ("${Platform_arch_model}", "${Platform_arch}")
  SOURCES.AD = \
-  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
-  $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) 
 else
  SOURCES.AD = \
  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
-  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
-  $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) 
 endif

 EXEC = $(OUTDIR)/adlc
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp
@ -98,7 +98,17 @@ class Argument VALUE_OBJ_CLASS_SPEC {
    // Only 8 registers may contain integer parameters.
    n_register_parameters = 8,
    // Can have up to 8 floating registers.
-    n_float_register_parameters = 8
+    n_float_register_parameters = 8,
+
+    // PPC C calling conventions.
+    // The first eight arguments are passed in int regs if they are int.
+    n_int_register_parameters_c = 8,
+    // The first thirteen float arguments are passed in float regs.
+    n_float_register_parameters_c = 13,
+    // Only the first 8 parameters are not placed on the stack. Aix disassembly
+    // shows that xlC places all float args after argument 8 on the stack AND
+    // in a register. This is not documented, but we follow this convention, too.
+    n_regs_not_on_stack_c = 8,
  };
  // creation
  Argument(int number) : _number(number) {}
@ -662,6 +672,14 @@ class Assembler : public AbstractAssembler {
    bcondCRbiIs1_bhintIsTaken    = bcondCRbiIs1 | bhintatIsTaken,
  };

+  // Elemental Memory Barriers (>=Power 8)
+  enum Elemental_Membar_mask_bits {
+    StoreStore = 1 << 0,
+    StoreLoad  = 1 << 1,
+    LoadStore  = 1 << 2,
+    LoadLoad   = 1 << 3
+  };
+
  // Branch prediction hints.
  inline static int add_bhint_to_boint(const int bhint, const int boint) {
    switch (boint) {
@ -753,17 +771,6 @@ class Assembler : public AbstractAssembler {

  enum Predict { pt = 1, pn = 0 }; // pt = predict taken

-  enum Membar_mask_bits { // page 184, v9
-    StoreStore = 1 << 3,
-    LoadStore  = 1 << 2,
-    StoreLoad  = 1 << 1,
-    LoadLoad   = 1 << 0,
-
-    Sync       = 1 << 6,
-    MemIssue   = 1 << 5,
-    Lookaside  = 1 << 4
-  };
-
  // instruction must start at passed address
  static int instr_len(unsigned char *instr) { return BytesPerInstWord; }

@ -875,19 +882,20 @@ class Assembler : public AbstractAssembler {
  #define inv_opp_s_field(x, hi_bit, lo_bit) inv_s_field_ppc(x, 31-(lo_bit), 31-(hi_bit))
  // Extract instruction fields from instruction words.
 public:
-  static int inv_ra_field(int x) { return inv_opp_u_field(x, 15, 11); }
-  static int inv_rb_field(int x) { return inv_opp_u_field(x, 20, 16); }
-  static int inv_rt_field(int x) { return inv_opp_u_field(x, 10,  6); }
-  static int inv_rs_field(int x) { return inv_opp_u_field(x, 10,  6); }
+  static int inv_ra_field(int x)  { return inv_opp_u_field(x, 15, 11); }
+  static int inv_rb_field(int x)  { return inv_opp_u_field(x, 20, 16); }
+  static int inv_rt_field(int x)  { return inv_opp_u_field(x, 10,  6); }
+  static int inv_rta_field(int x) { return inv_opp_u_field(x, 15, 11); }
+  static int inv_rs_field(int x)  { return inv_opp_u_field(x, 10,  6); }
  // Ds uses opp_s_field(x, 31, 16), but lowest 2 bits must be 0.
  // Inv_ds_field uses range (x, 29, 16) but shifts by 2 to ensure that lowest bits are 0.
-  static int inv_ds_field(int x) { return inv_opp_s_field(x, 29, 16) << 2; }
-  static int inv_d1_field(int x) { return inv_opp_s_field(x, 31, 16); }
-  static int inv_si_field(int x) { return inv_opp_s_field(x, 31, 16); }
-  static int inv_to_field(int x) { return inv_opp_u_field(x, 10, 6);  }
-  static int inv_lk_field(int x) { return inv_opp_u_field(x, 31, 31); }
-  static int inv_bo_field(int x) { return inv_opp_u_field(x, 10,  6); }
-  static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); }
+  static int inv_ds_field(int x)  { return inv_opp_s_field(x, 29, 16) << 2; }
+  static int inv_d1_field(int x)  { return inv_opp_s_field(x, 31, 16); }
+  static int inv_si_field(int x)  { return inv_opp_s_field(x, 31, 16); }
+  static int inv_to_field(int x)  { return inv_opp_u_field(x, 10, 6);  }
+  static int inv_lk_field(int x)  { return inv_opp_u_field(x, 31, 31); }
+  static int inv_bo_field(int x)  { return inv_opp_u_field(x, 10,  6); }
+  static int inv_bi_field(int x)  { return inv_opp_u_field(x, 15, 11); }

  #define opp_u_field(x, hi_bit, lo_bit) u_field(x, 31-(lo_bit), 31-(hi_bit))
  #define opp_s_field(x, hi_bit, lo_bit) s_field(x, 31-(lo_bit), 31-(hi_bit))
@ -925,6 +933,7 @@ class Assembler : public AbstractAssembler {
  static int l10(      int         x)  { return  opp_u_field(x,             10, 10); }
  static int l15(      int         x)  { return  opp_u_field(x,             15, 15); }
  static int l910(     int         x)  { return  opp_u_field(x,             10,  9); }
+  static int e1215(    int         x)  { return  opp_u_field(x,             15, 12); }
  static int lev(      int         x)  { return  opp_u_field(x,             26, 20); }
  static int li(       int         x)  { return  opp_s_field(x,             29,  6); }
  static int lk(       int         x)  { return  opp_u_field(x,             31, 31); }
@ -960,13 +969,13 @@ class Assembler : public AbstractAssembler {
  static int sr(       int         x)  { return  opp_u_field(x,             15, 12); }
  static int tbr(      int         x)  { return  opp_u_field(x,             20, 11); }
  static int th(       int         x)  { return  opp_u_field(x,             10,  7); }
-  static int thct(     int         x)  { assert((x&8)==0, "must be valid cache specification");  return th(x); }
-  static int thds(     int         x)  { assert((x&8)==8, "must be valid stream specification"); return th(x); }
+  static int thct(     int         x)  { assert((x&8) == 0, "must be valid cache specification");  return th(x); }
+  static int thds(     int         x)  { assert((x&8) == 8, "must be valid stream specification"); return th(x); }
  static int to(       int         x)  { return  opp_u_field(x,             10,  6); }
  static int u(        int         x)  { return  opp_u_field(x,             19, 16); }
  static int ui(       int         x)  { return  opp_u_field(x,             31, 16); }

-  // support vector instructions for >= Power6
+  // Support vector instructions for >= Power6.
  static int vra(      int         x)  { return  opp_u_field(x,             15, 11); }
  static int vrb(      int         x)  { return  opp_u_field(x,             20, 16); }
  static int vrc(      int         x)  { return  opp_u_field(x,             25, 21); }
@ -1090,8 +1099,8 @@ class Assembler : public AbstractAssembler {
  inline void subfic( Register d, Register a, int si16);
  inline void add(    Register d, Register a, Register b);
  inline void add_(   Register d, Register a, Register b);
-  inline void subf(   Register d, Register a, Register b);
-  inline void sub(    Register d, Register a, Register b);
+  inline void subf(   Register d, Register a, Register b);  // d = b - a    "Sub_from", as in ppc spec.
+  inline void sub(    Register d, Register a, Register b);  // d = a - b    Swap operands of subf for readability.
  inline void subf_(  Register d, Register a, Register b);
  inline void addc(   Register d, Register a, Register b);
  inline void addc_(  Register d, Register a, Register b);
@ -1204,7 +1213,7 @@ class Assembler : public AbstractAssembler {
  }
  // endgroup opcode for Power6
  static bool is_endgroup(int x) {
-    return is_ori(x) && inv_ra_field(x)==1 && inv_rs_field(x)==1 && inv_d1_field(x)==0;
+    return is_ori(x) && inv_ra_field(x) == 1 && inv_rs_field(x) == 1 && inv_d1_field(x) == 0;
  }


@ -1227,9 +1236,13 @@ class Assembler : public AbstractAssembler {
  inline void cmpld( ConditionRegister crx, Register a, Register b);

  inline void isel(   Register d, Register a, Register b, int bc);
+  // Convenient version which takes: Condition register, Condition code and invert flag. Omit b to keep old value.
+  inline void isel(   Register d, ConditionRegister cr, Condition cc, bool inv, Register a, Register b = noreg);
+  // Set d = 0 if (cr.cc) equals 1, otherwise b.
+  inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg);

  // PPC 1, section 3.3.11, Fixed-Point Logical Instructions
-         void andi(   Register a, Register s, int ui16);    // optimized version
+         void andi(   Register a, Register s, int ui16);   // optimized version
  inline void andi_(  Register a, Register s, int ui16);
  inline void andis_( Register a, Register s, int ui16);
  inline void ori(    Register a, Register s, int ui16);
@ -1553,10 +1566,7 @@ class Assembler : public AbstractAssembler {
  inline void ptesync();
  inline void eieio();
  inline void isync();
-
-  inline void release();
-  inline void acquire();
-  inline void fence();
+  inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8)

  // atomics
  inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
@ -1938,7 +1948,7 @@ class Assembler : public AbstractAssembler {
  inline void load_const(Register d, AddressLiteral& a, Register tmp = noreg);

  // Load a 64 bit constant, optimized, not identifyable.
-  // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
+  // Tmp can be used to increase ILP. Set return_simm16_rest = true to get a
  // 16 bit immediate offset. This is useful if the offset can be encoded in
  // a succeeding instruction.
         int load_const_optimized(Register d, long a,  Register tmp = noreg, bool return_simm16_rest = false);
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp
@ -224,8 +224,12 @@ inline void Assembler::clrlsldi_(Register a, Register s, int clrl6, int shl6) {
 inline void Assembler::extrdi(  Register a, Register s, int n, int b){ Assembler::rldicl(a, s, b+n, 64-n); }
 // testbit with condition register.
 inline void Assembler::testbitdi(ConditionRegister cr, Register a, Register s, int ui6) {
-  Assembler::rldicr(a, s, 63-ui6, 0);
-  Assembler::cmpdi(cr, a, 0);
+  if (cr == CCR0) {
+    Assembler::rldicr_(a, s, 63-ui6, 0);
+  } else {
+    Assembler::rldicr(a, s, 63-ui6, 0);
+    Assembler::cmpdi(cr, a, 0);
+  }
 }

 // rotate instructions
@ -423,6 +427,27 @@ inline void Assembler::creqv( int d, int s1, int s2) { emit_int32(CREQV_OPCODE
 inline void Assembler::crandc(int d, int s1, int s2) { emit_int32(CRANDC_OPCODE | bt(d) | ba(s1) | bb(s2)); }
 inline void Assembler::crorc( int d, int s1, int s2) { emit_int32(CRORC_OPCODE  | bt(d) | ba(s1) | bb(s2)); }

+// Conditional move (>= Power7)
+inline void Assembler::isel(Register d, ConditionRegister cr, Condition cc, bool inv, Register a, Register b) {
+  if (b == noreg) {
+    b = d; // Can be omitted if old value should be kept in "else" case.
+  }
+  Register first = a;
+  Register second = b;
+  if (inv) {
+    first = b;
+    second = a; // exchange
+  }
+  assert(first != R0, "r0 not allowed");
+  isel(d, first, second, bi0(cr, cc));
+}
+inline void Assembler::isel_0(Register d, ConditionRegister cr, Condition cc, Register b) {
+  if (b == noreg) {
+    b = d; // Can be omitted if old value should be kept in "else" case.
+  }
+  isel(d, R0, b, bi0(cr, cc));
+}
+
 // PPC 2, section 3.2.1 Instruction Cache Instructions
 inline void Assembler::icbi(    Register s1, Register s2)         { emit_int32( ICBI_OPCODE   | ra0mem(s1) | rb(s2)           ); }
 // PPC 2, section 3.2.2 Data Cache Instructions
@ -445,10 +470,7 @@ inline void Assembler::lwsync()    { Assembler::sync(1); }
 inline void Assembler::ptesync()   { Assembler::sync(2); }
 inline void Assembler::eieio()     { emit_int32( EIEIO_OPCODE); }
 inline void Assembler::isync()     { emit_int32( ISYNC_OPCODE); }
-
-inline void Assembler::release()   { Assembler::lwsync(); }
-inline void Assembler::acquire()   { Assembler::lwsync(); }
-inline void Assembler::fence()     { Assembler::sync(); }
+inline void Assembler::elemental_membar(int e) { assert(0 < e && e < 16, "invalid encoding"); emit_int32( SYNC_OPCODE | e1215(e)); }

 // atomics
 // Use ra0mem to disallow R0 as base.
@ -767,7 +789,6 @@ inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL
 inline void Assembler::lvsl(  VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE   | vrt(d) | rb(s2)); }
 inline void Assembler::lvsr(  VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE   | vrt(d) | rb(s2)); }

-
 inline void Assembler::load_const(Register d, void* x, Register tmp) {
   load_const(d, (long)x, tmp);
 }
--- a/hotspot/src/cpu/ppc/vm/bytecodeInterpreter_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/bytecodeInterpreter_ppc.hpp
@ -100,6 +100,7 @@ public:
 #define SET_LOCALS_DOUBLE(value, offset)  (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value))
 #define SET_LOCALS_LONG(value, offset)    (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value))
 #define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \
+                                                  ((VMJavaVal64*)(addr))->d)


 #endif // CPU_PPC_VM_BYTECODEINTERPRETER_PPC_PP
--- a/hotspot/src/cpu/ppc/vm/bytes_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/bytes_ppc.hpp
@ -33,7 +33,7 @@ class Bytes: AllStatic {
  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
  // PowerPC needs to check for alignment.

-  // can I count on address always being a pointer to an unsigned char? Yes
+  // Can I count on address always being a pointer to an unsigned char? Yes.

  // Returns true, if the byte ordering used by Java is different from the nativ byte ordering
  // of the underlying machine. For example, true for Intel x86, False, for Solaris on Sparc.
@ -141,7 +141,6 @@ class Bytes: AllStatic {
    }
  }

-
  // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering)
  // (no byte-order reversal is needed since Power CPUs are big-endian oriented).
  static inline u2   get_Java_u2(address p) { return get_native_u2(p); }
--- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp
@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C2_GLOBALS_PPC_HPP
+#define CPU_PPC_VM_C2_GLOBALS_PPC_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the server compiler.
+// (see c2_globals.hpp).
+
+define_pd_global(bool, BackgroundCompilation,        true);
+define_pd_global(bool, CICompileOSR,                 true);
+define_pd_global(bool, InlineIntrinsics,             true);
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 true);
+define_pd_global(bool, UseOnStackReplacement,        true);
+define_pd_global(bool, ProfileInterpreter,           true);
+define_pd_global(bool, TieredCompilation,            false);
+define_pd_global(intx, CompileThreshold,             10000);
+define_pd_global(intx, BackEdgeThreshold,            140000);
+
+define_pd_global(intx, OnStackReplacePercentage,     140);
+define_pd_global(intx, ConditionalMoveLimit,         3);
+define_pd_global(intx, FLOATPRESSURE,                28);
+define_pd_global(intx, FreqInlineSize,               175);
+define_pd_global(intx, MinJumpTableSize,             10);
+define_pd_global(intx, INTPRESSURE,                  25);
+define_pd_global(intx, InteriorEntryAlignment,       16);
+define_pd_global(intx, NewSizeThreadIncrease,        ScaleForWordSize(4*K));
+define_pd_global(intx, RegisterCostAreaRatio,        16000);
+define_pd_global(bool, UseTLAB,                      true);
+define_pd_global(bool, ResizeTLAB,                   true);
+define_pd_global(intx, LoopUnrollLimit,              60);
+
+// Peephole and CISC spilling both break the graph, and so make the
+// scheduler sick.
+define_pd_global(bool, OptoPeephole,                 false);
+define_pd_global(bool, UseCISCSpill,                 false);
+define_pd_global(bool, OptoBundling,                 false);
+// GL:
+// Detected a problem with unscaled compressed oops and
+// narrow_oop_use_complex_address() == false.
+// -Djava.io.tmpdir=./tmp -jar SPECjvm2008.jar -ikv -wt 3 -it 3
+//   -bt 1 --base compiler.sunflow
+// fails in Lower.visitIf->translate->tranlate->translate and
+// throws an unexpected NPE. A load and a store seem to be
+// reordered.  Java reads about:
+//   loc = x.f
+//   x.f = 0
+//   NullCheck loc
+// While assembler reads:
+//   x.f = 0
+//   loc = x.f
+//   NullCheck loc
+define_pd_global(bool, OptoScheduling,               false);
+
+define_pd_global(intx, InitialCodeCacheSize,         2048*K); // Integral multiple of CodeCacheExpansionSize
+define_pd_global(intx, ReservedCodeCacheSize,        256*M);
+define_pd_global(intx, CodeCacheExpansionSize,       64*K);
+
+// Ergonomics related flags
+define_pd_global(uint64_t,MaxRAM,                    4ULL*G);
+define_pd_global(uintx, CodeCacheMinBlockLength,     4);
+define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
+
+define_pd_global(bool,  TrapBasedRangeChecks,        false);
+
+// Heap related flags
+define_pd_global(uintx,MetaspaceSize,                ScaleForWordSize(16*M));
+
+// Ergonomics related flags
+define_pd_global(bool, NeverActAsServerClassMachine, false);
+
+#endif // CPU_PPC_VM_C2_GLOBALS_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/c2_init_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/c2_init_ppc.cpp
@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+#include "runtime/globals.hpp"
+#include "utilities/debug.hpp"
+
+// processor dependent initialization for ppc
+
+void Compile::pd_compiler2_init() {
+
+  // Power7 and later
+  if (PowerArchitecturePPC64 > 6) {
+    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+      FLAG_SET_ERGO(bool, UsePopCountInstruction, true);
+    }
+  }
+
+  if (PowerArchitecturePPC64 == 6) {
+    if (FLAG_IS_DEFAULT(InsertEndGroupPPC64)) {
+      FLAG_SET_ERGO(bool, InsertEndGroupPPC64, true);
+    }
+  }
+}
--- a/hotspot/src/cpu/ppc/vm/copy_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/copy_ppc.hpp
@ -105,10 +105,12 @@ static void copy_conjoint_atomic(T* from, T* to, size_t count) {
 }

 static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
+  // TODO: contribute optimized version.
  copy_conjoint_atomic<jshort>(from, to, count);
 }

 static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
+  // TODO: contribute optimized version.
  copy_conjoint_atomic<jint>(from, to, count);
 }

@ -125,10 +127,12 @@ static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count
 }

 static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
+  // TODO: contribute optimized version.
  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
 }

 static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
+  // TODO: contribute optimized version.
  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
 }

--- a/hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.cpp
@ -1981,8 +1981,7 @@ address CppInterpreterGenerator::generate_normal_entry(void) {

  // Restore R14_state.
  __ ld(R14_state, 0, R1_SP);
-  __ addi(R14_state, R14_state,
-              -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
+  __ addi(R14_state, R14_state, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());

  //
  // Registers alive
--- a/hotspot/src/cpu/ppc/vm/frame_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/frame_ppc.cpp
@ -176,13 +176,14 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result)
  Method* method = interpreter_frame_method();
  BasicType type = method->result_type();

-#ifdef CC_INTERP
  if (method->is_native()) {
    // Prior to calling into the runtime to notify the method exit the possible
    // result value is saved into the interpreter frame.
+#ifdef CC_INTERP
    interpreterState istate = get_interpreterState();
    address lresult = (address)istate + in_bytes(BytecodeInterpreter::native_lresult_offset());
    address fresult = (address)istate + in_bytes(BytecodeInterpreter::native_fresult_offset());
+#endif

    switch (method->result_type()) {
      case T_OBJECT:
@ -226,9 +227,6 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result)
      default        : ShouldNotReachHere();
    }
  }
-#else
-  Unimplemented();
-#endif
  return type;
 }

--- a/hotspot/src/cpu/ppc/vm/frame_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/frame_ppc.hpp
@ -421,7 +421,7 @@
 #ifdef CC_INTERP
  // Additional interface for interpreter frames:
  inline interpreterState get_interpreterState() const;
-#endif
+#endif // CC_INTERP

  // Size of a monitor in bytes.
  static int interpreter_frame_monitor_size_in_bytes();
@ -431,7 +431,6 @@

 private:

-  // PPC port: permgen stuff
  ConstantPoolCache** interpreter_frame_cpoolcache_addr() const;

 public:
--- a/hotspot/src/cpu/ppc/vm/frame_ppc.inline.hpp
+++ b/hotspot/src/cpu/ppc/vm/frame_ppc.inline.hpp
@ -78,11 +78,8 @@ inline frame::frame(intptr_t* sp, address pc, intptr_t* unextended_sp) : _sp(sp)
 // can distinguish identity and younger/older relationship. NULL
 // represents an invalid (incomparable) frame.
 inline intptr_t* frame::id(void) const {
-  // Use the _unextended_pc as the frame's ID. Because we have no
-  // adapters, but resized compiled frames, some of the new code
-  // (e.g. JVMTI) wouldn't work if we return the (current) SP of the
-  // frame.
-  return _unextended_sp;
+  // Use _fp. _sp or _unextended_sp wouldn't be correct due to resizing.
+  return _fp;
 }

 // Return true if this frame is older (less recent activation) than
--- a/hotspot/src/cpu/ppc/vm/globals_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/globals_ppc.hpp
@ -62,6 +62,13 @@ define_pd_global(uintx, TypeProfileLevel, 0);

 // Platform dependent flag handling: flags only defined on this platform.
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct)  \
+                                                                            \
+  /* Load poll address from thread. This is used to implement per-thread */ \
+  /* safepoints on platforms != IA64. */                                    \
+  product(bool, LoadPollAddressFromThread, false,                           \
+          "Load polling page address from thread object (required for "     \
+          "per-thread safepoints on platforms != IA64)")                    \
+                                                                            \
  product(uintx, PowerArchitecturePPC64, 0,                                 \
          "CPU Version: x for PowerX. Currently recognizes Power5 to "      \
          "Power7. Default is 0. CPUs newer than Power7 will be "           \
@ -88,6 +95,14 @@ define_pd_global(uintx, TypeProfileLevel, 0);
                                                                            \
  product(bool, UseStaticBranchPredictionInCompareAndSwapPPC64, true,       \
          "Use static branch prediction hints in CAS operations.")          \
+  product(bool, UseStaticBranchPredictionForUncommonPathsPPC64, false,      \
+          "Use static branch prediction hints for uncommon paths.")         \
+                                                                            \
+  product(bool, UsePower6SchedulerPPC64, false,                             \
+          "Use Power6 Scheduler.")                                          \
+                                                                            \
+  product(bool, InsertEndGroupPPC64, false,                                 \
+          "Insert EndGroup instructions to optimize for Power6.")           \
                                                                            \
  /* Trap based checks. */                                                  \
  /* Trap based checks use the ppc trap instructions to check certain */    \
@ -108,5 +123,4 @@ define_pd_global(uintx, TypeProfileLevel, 0);
          " Use this to ease debugging.")                                   \


-
 #endif // CPU_PPC_VM_GLOBALS_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/icache_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/icache_ppc.cpp
@ -28,17 +28,17 @@
 #include "runtime/icache.hpp"

 // Use inline assembler to implement icache flush.
-int ppc64_flush_icache(address start, int lines, int magic){
+int ICache::ppc64_flush_icache(address start, int lines, int magic) {
  address end = start + (unsigned int)lines*ICache::line_size;
  assert(start <= end, "flush_icache parms");

  // store modified cache lines from data cache
-  for (address a=start; a<end; a+=ICache::line_size) {
+  for (address a = start; a < end; a += ICache::line_size) {
    __asm__ __volatile__(
-       "dcbst 0, %0  \n"
-       :
-       : "r" (a)
-       : "memory");
+     "dcbst 0, %0  \n"
+     :
+     : "r" (a)
+     : "memory");
  }

  // sync instruction
@ -49,20 +49,20 @@ int ppc64_flush_icache(address start, int lines, int magic){
     : "memory");

  // invalidate respective cache lines in instruction cache
-  for (address a=start; a<end; a+=ICache::line_size) {
+  for (address a = start; a < end; a += ICache::line_size) {
    __asm__ __volatile__(
-       "icbi 0, %0   \n"
-       :
-       : "r" (a)
-       : "memory");
+     "icbi 0, %0   \n"
+     :
+     : "r" (a)
+     : "memory");
  }

  // discard fetched instructions
  __asm__ __volatile__(
-                 "isync \n"
-                 :
-                 :
-                 : "memory");
+     "isync \n"
+     :
+     :
+     : "memory");

  return magic;
 }
@ -70,7 +70,7 @@ int ppc64_flush_icache(address start, int lines, int magic){
 void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
  StubCodeMark mark(this, "ICache", "flush_icache_stub");

-  *flush_icache_stub = (ICache::flush_icache_stub_t)ppc64_flush_icache;
+  *flush_icache_stub = (ICache::flush_icache_stub_t)ICache::ppc64_flush_icache;

  // First call to flush itself
  ICache::invalidate_range((address)(*flush_icache_stub), 0);
--- a/hotspot/src/cpu/ppc/vm/icache_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/icache_ppc.hpp
@ -30,15 +30,23 @@
 // code, part of the processor instruction cache potentially has to be flushed.

 class ICache : public AbstractICache {
+  friend class ICacheStubGenerator;
+  static int ppc64_flush_icache(address start, int lines, int magic);
+
 public:
  enum {
-    // On PowerPC the cache line size is 32 bytes.
-    stub_size      = 160, // Size of the icache flush stub in bytes.
-    line_size      = 32,  // Flush instruction affects 32 bytes.
-    log2_line_size = 5    // log2(line_size)
+    // Actually, cache line size is 64, but keeping it as it is to be
+    // on the safe side on ALL PPC64 implementations.
+    log2_line_size = 5,
+    line_size      = 1 << log2_line_size
  };

-  // Use default implementation
+  static void ppc64_flush_icache_bytes(address start, int bytes) {
+    // Align start address to an icache line boundary and transform
+    // nbytes to an icache line count.
+    const uint line_offset = mask_address_bits(start, line_size - 1);
+    ppc64_flush_icache(start - line_offset, (bytes + line_offset + line_size - 1) >> log2_line_size, 0);
+  }
 };

 #endif // CPU_PPC_VM_ICACHE_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp
@ -30,13 +30,21 @@
 #include "interp_masm_ppc_64.hpp"
 #include "interpreter/interpreterRuntime.hpp"

-
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) // nothing
 #else
 #define BLOCK_COMMENT(str) block_comment(str)
 #endif

+void InterpreterMacroAssembler::null_check_throw(Register a, int offset, Register temp_reg) {
+#ifdef CC_INTERP
+  address exception_entry = StubRoutines::throw_NullPointerException_at_call_entry();
+#else
+  address exception_entry = Interpreter::throw_NullPointerException_entry();
+#endif
+  MacroAssembler::null_check_throw(a, offset, temp_reg, exception_entry);
+}
+
 // Lock object
 //
 // Registers alive
@ -47,7 +55,7 @@
 void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
  if (UseHeavyMonitors) {
    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
-            monitor, /*check_for_exceptions=*/false);
+            monitor, /*check_for_exceptions=*/true CC_INTERP_ONLY(&& false));
  } else {
    // template code:
    //
@ -69,7 +77,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
    const Register tmp              = R10_ARG8;

    Label done;
-    Label slow_case;
+    Label cas_failed, slow_case;

    assert_different_registers(displaced_header, object_mark_addr, current_header, tmp);

@ -91,7 +99,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {

    // Initialize the box (Must happen before we update the object mark!).
    std(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
-            BasicLock::displaced_header_offset_in_bytes(), monitor);
+        BasicLock::displaced_header_offset_in_bytes(), monitor);

    // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {

@ -106,12 +114,14 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
             /*compare_value=*/displaced_header, /*exchange_value=*/monitor,
             /*where=*/object_mark_addr,
             MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
-             MacroAssembler::cmpxchgx_hint_acquire_lock());
+             MacroAssembler::cmpxchgx_hint_acquire_lock(),
+             noreg,
+             &cas_failed);

    // If the compare-and-exchange succeeded, then we found an unlocked
    // object and we have now locked it.
-    beq(CCR0, done);
-
+    b(done);
+    bind(cas_failed);

    // } else if (THREAD->is_lock_owned((address)displaced_header))
    //   // Simple recursive case.
@ -134,7 +144,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
    bne(CCR0, slow_case);
    release();
    std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
-            BasicLock::displaced_header_offset_in_bytes(), monitor);
+        BasicLock::displaced_header_offset_in_bytes(), monitor);
    b(done);


@ -146,7 +156,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
    // slow case of monitor enter.
    bind(slow_case);
    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
-            monitor, /*check_for_exceptions=*/false);
+            monitor, /*check_for_exceptions=*/true CC_INTERP_ONLY(&& false));
    // }

    bind(done);
@ -160,7 +170,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
 //             which must be initialized with the object to lock.
 //
 // Throw IllegalMonitorException if object is not locked by current thread.
-void InterpreterMacroAssembler::unlock_object(Register monitor) {
+void InterpreterMacroAssembler::unlock_object(Register monitor, bool check_for_exceptions) {
  if (UseHeavyMonitors) {
    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
            monitor, /*check_for_exceptions=*/false);
@ -184,9 +194,8 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
    const Register object_mark_addr = R9_ARG7;
    const Register current_header   = R10_ARG8;

-    Label no_recursive_unlock;
+    Label free_slot;
    Label slow_case;
-    Label done;

    assert_different_registers(object, displaced_header, object_mark_addr, current_header);

@ -194,7 +203,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
      // The object address from the monitor is in object.
      ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
      assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
-      biased_locking_exit(CCR0, object, displaced_header, done);
+      biased_locking_exit(CCR0, object, displaced_header, free_slot);
    }

    // Test first if we are in the fast recursive case.
@ -203,13 +212,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {

    // If the displaced header is zero, we have a recursive unlock.
    cmpdi(CCR0, displaced_header, 0);
-    bne(CCR0, no_recursive_unlock);
-    // Release in recursive unlock is not necessary.
-    // release();
-    std(displaced_header/*==0!*/, BasicObjectLock::obj_offset_in_bytes(), monitor);
-    b(done);
-
-    bind(no_recursive_unlock);
+    beq(CCR0, free_slot); // recursive unlock

    // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
    //   // We swapped the unlocked mark in displaced_header into the object's mark word.
@ -218,7 +221,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
    // If we still have a lightweight lock, unlock the object and be done.

    // The object address from the monitor is in object.
-    ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
+    if (!UseBiasedLocking) ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
    addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());

    // We have the displaced header in displaced_header. If the lock is still
@ -229,17 +232,11 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
             /*current_value=*/current_header,
             /*compare_value=*/monitor, /*exchange_value=*/displaced_header,
             /*where=*/object_mark_addr,
-             MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
-             MacroAssembler::cmpxchgx_hint_release_lock());
-    bne(CCR0, slow_case);
-
-    // Exchange worked, do monitor->set_obj(NULL).
-    li(R0, 0);
-    // Must realease earlier (see cmpxchgd above).
-    // release();
-    std(R0, BasicObjectLock::obj_offset_in_bytes(), monitor);
-    b(done);
-
+             MacroAssembler::MemBarRel,
+             MacroAssembler::cmpxchgx_hint_release_lock(),
+             noreg,
+             &slow_case);
+    b(free_slot);

    // } else {
    //   // Slow path.
@ -249,9 +246,17 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
    // we need to get into the slow case.
    bind(slow_case);
    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
-            monitor, /*check_for_exceptions=*/false);
+            monitor, check_for_exceptions CC_INTERP_ONLY(&& false));
    // }

+    Label done;
+    b(done); // Monitor register may be overwritten! Runtime has already freed the slot.
+
+    // Exchange worked, do monitor->set_obj(NULL);
+    align(32, 12);
+    bind(free_slot);
+    li(R0, 0);
+    std(R0, BasicObjectLock::obj_offset_in_bytes(), monitor);
    bind(done);
  }
 }
@ -375,6 +380,7 @@ void InterpreterMacroAssembler::notify_method_exit(bool is_native_method, TosSta
    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit),
            /*check_exceptions=*/false);

+    align(32, 12);
    bind(jvmti_post_done);
  }
 }
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
@ -37,6 +37,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
 public:
  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}

+  void null_check_throw(Register a, int offset, Register temp_reg);
+
  // Handy address generation macros
 #define thread_(field_name) in_bytes(JavaThread::field_name ## _offset()), R16_thread
 #define method_(field_name) in_bytes(Method::field_name ## _offset()), R19_method
@ -51,15 +53,16 @@ class InterpreterMacroAssembler: public MacroAssembler {

  // Object locking
  void lock_object  (Register lock_reg, Register obj_reg);
-  void unlock_object(Register lock_reg);
+  void unlock_object(Register lock_reg, bool check_for_exceptions = true);

  // Debugging
  void verify_oop(Register reg, TosState state = atos);    // only if +VerifyOops && state == atos

  // support for jvmdi/jvmpi
  void notify_method_entry();
-  void notify_method_exit(bool save_result, TosState state);
+  void notify_method_exit(bool is_native_method, TosState state);

+#ifdef CC_INTERP
  // Convert the current TOP_IJAVA_FRAME into a PARENT_IJAVA_FRAME
  // (using parent_frame_resize) and push a new interpreter
  // TOP_IJAVA_FRAME (using frame_size).
@ -84,6 +87,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
  void pop_interpreter_state(bool prev_state_may_be_0);

  void restore_prev_state();
+#endif
 };

 #endif // CPU_PPC_VM_INTERP_MASM_PPC_64_HPP
--- a/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp
@ -396,18 +396,14 @@ address AbstractInterpreterGenerator::generate_result_handler_for(BasicType type
  //

  Label done;
-  Label is_false;
-
  address entry = __ pc();

  switch (type) {
  case T_BOOLEAN:
-    __ cmpwi(CCR0, R3_RET, 0);
-    __ beq(CCR0, is_false);
-    __ li(R3_RET, 1);
-    __ b(done);
-    __ bind(is_false);
-    __ li(R3_RET, 0);
+    // convert !=0 to 1
+    __ neg(R0, R3_RET);
+    __ orr(R0, R3_RET, R0);
+    __ srwi(R3_RET, R0, 31);
    break;
  case T_BYTE:
     // sign extend 8 bits
@ -478,7 +474,7 @@ address InterpreterGenerator::generate_abstract_entry(void) {

  // Push a new C frame and save LR.
  __ save_LR_CR(R0);
-  __ push_frame_abi112_nonvolatiles(0, R11_scratch1);
+  __ push_frame_abi112(0, R11_scratch1);

  // This is not a leaf but we have a JavaFrameAnchor now and we will
  // check (create) exceptions afterward so this is ok.
@ -491,8 +487,12 @@ address InterpreterGenerator::generate_abstract_entry(void) {
  // Reset JavaFrameAnchor from call_VM_leaf above.
  __ reset_last_Java_frame();

+#ifdef CC_INTERP
  // Return to frame manager, it will handle the pending exception.
  __ blr();
+#else
+  Unimplemented();
+#endif

  return entry;
 }
@ -503,16 +503,20 @@ address InterpreterGenerator::generate_accessor_entry(void) {
  if(!UseFastAccessorMethods && (!FLAG_IS_ERGO(UseFastAccessorMethods)))
    return NULL;

-  Label Ldone, Lslow_path;
+  Label Lslow_path, Lacquire;

-  const Register Rthis = R3_ARG1,
+  const Register
+         Rclass_or_obj = R3_ARG1,
         Rconst_method = R4_ARG2,
         Rcodes        = Rconst_method,
         Rcpool_cache  = R5_ARG3,
         Rscratch      = R11_scratch1,
         Rjvmti_mode   = Rscratch,
         Roffset       = R12_scratch2,
-         Rflags        = R6_ARG4;
+         Rflags        = R6_ARG4,
+         Rbtable       = R7_ARG5;
+
+  static address branch_table[number_of_states];

  address entry = __ pc();

@ -521,13 +525,9 @@ address InterpreterGenerator::generate_accessor_entry(void) {

  // Also check for JVMTI mode
  // Check for null obj, take slow path if so.
-#ifdef CC_INTERP
-  __ ld(Rthis, Interpreter::stackElementSize, R17_tos);
-#else
-  Unimplemented()
-#endif
+  __ ld(Rclass_or_obj, Interpreter::stackElementSize, CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp));
  __ lwz(Rjvmti_mode, thread_(interp_only_mode));
-  __ cmpdi(CCR1, Rthis, 0);
+  __ cmpdi(CCR1, Rclass_or_obj, 0);
  __ cmpwi(CCR0, Rjvmti_mode, 0);
  __ crorc(/*CCR0 eq*/2, /*CCR1 eq*/4+2, /*CCR0 eq*/2);
  __ beq(CCR0, Lslow_path); // this==null or jvmti_mode!=0
@ -560,58 +560,127 @@ address InterpreterGenerator::generate_accessor_entry(void) {
  __ ld(Rflags, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::flags_offset()), Rcpool_cache);
  __ ld(Roffset, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f2_offset()), Rcpool_cache);

-  // Get field type.
-  // (Rflags>>ConstantPoolCacheEntry::tos_state_shift)&((1<<ConstantPoolCacheEntry::tos_state_bits)-1)
+  // Following code is from templateTable::getfield_or_static
+  // Load pointer to branch table
+  __ load_const_optimized(Rbtable, (address)branch_table, Rscratch);
+
+  // Get volatile flag
+  __ rldicl(Rscratch, Rflags, 64-ConstantPoolCacheEntry::is_volatile_shift, 63); // extract volatile bit
+  // note: sync is needed before volatile load on PPC64
+
+  // Check field type
  __ rldicl(Rflags, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);

 #ifdef ASSERT
-    __ ld(R9_ARG7, 0, R1_SP);
-    __ ld(R10_ARG8, 0, R21_sender_SP);
-    __ cmpd(CCR0, R9_ARG7, R10_ARG8);
-    __ asm_assert_eq("backlink", 0x543);
+  Label LFlagInvalid;
+  __ cmpldi(CCR0, Rflags, number_of_states);
+  __ bge(CCR0, LFlagInvalid);
+
+  __ ld(R9_ARG7, 0, R1_SP);
+  __ ld(R10_ARG8, 0, R21_sender_SP);
+  __ cmpd(CCR0, R9_ARG7, R10_ARG8);
+  __ asm_assert_eq("backlink", 0x543);
 #endif // ASSERT
  __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.

-  // Load the return value according to field type.
-  Label Litos, Lltos, Lbtos, Lctos, Lstos;
-  __ cmpdi(CCR1, Rflags, itos);
-  __ cmpdi(CCR0, Rflags, ltos);
-  __ beq(CCR1, Litos);
-  __ beq(CCR0, Lltos);
-  __ cmpdi(CCR1, Rflags, btos);
-  __ cmpdi(CCR0, Rflags, ctos);
-  __ beq(CCR1, Lbtos);
-  __ beq(CCR0, Lctos);
-  __ cmpdi(CCR1, Rflags, stos);
-  __ beq(CCR1, Lstos);
+  // Load from branch table and dispatch (volatile case: one instruction ahead)
+  __ sldi(Rflags, Rflags, LogBytesPerWord);
+  __ cmpwi(CCR6, Rscratch, 1); // volatile?
+  __ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // volatile ? size of 1 instruction : 0
+  __ ldx(Rbtable, Rbtable, Rflags);
+
+  __ subf(Rbtable, Rscratch, Rbtable); // point to volatile/non-volatile entry point
+  __ mtctr(Rbtable);
+  __ bctr();
+
 #ifdef ASSERT
-  __ cmpdi(CCR0, Rflags, atos);
-  __ asm_assert_eq("what type is this?", 0x432);
+  __ bind(LFlagInvalid);
+  __ stop("got invalid flag", 0x6541);
+
+  bool all_uninitialized = true,
+       all_initialized   = true;
+  for (int i = 0; i<number_of_states; ++i) {
+    all_uninitialized = all_uninitialized && (branch_table[i] == NULL);
+    all_initialized   = all_initialized   && (branch_table[i] != NULL);
+  }
+  assert(all_uninitialized != all_initialized, "consistency"); // either or
+
+  __ sync(); // volatile entry point (one instruction before non-volatile_entry point)
+  if (branch_table[vtos] == 0) branch_table[vtos] = __ pc(); // non-volatile_entry point
+  if (branch_table[dtos] == 0) branch_table[dtos] = __ pc(); // non-volatile_entry point
+  if (branch_table[ftos] == 0) branch_table[ftos] = __ pc(); // non-volatile_entry point
+  __ stop("unexpected type", 0x6551);
 #endif
-  // fallthru: __ bind(Latos);
-  __ load_heap_oop(R3_RET, (RegisterOrConstant)Roffset, Rthis);
+
+  if (branch_table[itos] == 0) { // generate only once
+    __ align(32, 28, 28); // align load
+    __ sync(); // volatile entry point (one instruction before non-volatile_entry point)
+    branch_table[itos] = __ pc(); // non-volatile_entry point
+    __ lwax(R3_RET, Rclass_or_obj, Roffset);
+    __ beq(CCR6, Lacquire);
+    __ blr();
+  }
+
+  if (branch_table[ltos] == 0) { // generate only once
+    __ align(32, 28, 28); // align load
+    __ sync(); // volatile entry point (one instruction before non-volatile_entry point)
+    branch_table[ltos] = __ pc(); // non-volatile_entry point
+    __ ldx(R3_RET, Rclass_or_obj, Roffset);
+    __ beq(CCR6, Lacquire);
+    __ blr();
+  }
+
+  if (branch_table[btos] == 0) { // generate only once
+    __ align(32, 28, 28); // align load
+    __ sync(); // volatile entry point (one instruction before non-volatile_entry point)
+    branch_table[btos] = __ pc(); // non-volatile_entry point
+    __ lbzx(R3_RET, Rclass_or_obj, Roffset);
+    __ extsb(R3_RET, R3_RET);
+    __ beq(CCR6, Lacquire);
+    __ blr();
+  }
+
+  if (branch_table[ctos] == 0) { // generate only once
+    __ align(32, 28, 28); // align load
+    __ sync(); // volatile entry point (one instruction before non-volatile_entry point)
+    branch_table[ctos] = __ pc(); // non-volatile_entry point
+    __ lhzx(R3_RET, Rclass_or_obj, Roffset);
+    __ beq(CCR6, Lacquire);
+    __ blr();
+  }
+
+  if (branch_table[stos] == 0) { // generate only once
+    __ align(32, 28, 28); // align load
+    __ sync(); // volatile entry point (one instruction before non-volatile_entry point)
+    branch_table[stos] = __ pc(); // non-volatile_entry point
+    __ lhax(R3_RET, Rclass_or_obj, Roffset);
+    __ beq(CCR6, Lacquire);
+    __ blr();
+  }
+
+  if (branch_table[atos] == 0) { // generate only once
+    __ align(32, 28, 28); // align load
+    __ sync(); // volatile entry point (one instruction before non-volatile_entry point)
+    branch_table[atos] = __ pc(); // non-volatile_entry point
+    __ load_heap_oop(R3_RET, (RegisterOrConstant)Roffset, Rclass_or_obj);
+    __ verify_oop(R3_RET);
+    //__ dcbt(R3_RET); // prefetch
+    __ beq(CCR6, Lacquire);
+    __ blr();
+  }
+
+  __ align(32, 12);
+  __ bind(Lacquire);
+  __ twi_0(R3_RET);
+  __ isync(); // acquire
  __ blr();

-  __ bind(Litos);
-  __ lwax(R3_RET, Rthis, Roffset);
-  __ blr();
-
-  __ bind(Lltos);
-  __ ldx(R3_RET, Rthis, Roffset);
-  __ blr();
-
-  __ bind(Lbtos);
-  __ lbzx(R3_RET, Rthis, Roffset);
-  __ extsb(R3_RET, R3_RET);
-  __ blr();
-
-  __ bind(Lctos);
-  __ lhzx(R3_RET, Rthis, Roffset);
-  __ blr();
-
-  __ bind(Lstos);
-  __ lhax(R3_RET, Rthis, Roffset);
-  __ blr();
+#ifdef ASSERT
+  for (int i = 0; i<number_of_states; ++i) {
+    assert(branch_table[i], "accessor_entry initialization");
+    //tty->print_cr("accessor_entry: branch_table[%d] = 0x%llx (opcode 0x%llx)", i, branch_table[i], *((unsigned int*)branch_table[i]));
+  }
+#endif

  __ bind(Lslow_path);
  assert(Interpreter::entry_for_kind(Interpreter::zerolocals), "Normal entry must have been generated by now");
@ -670,18 +739,14 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
    // continue and the thread will safepoint at the next bytecode dispatch.

    // If the receiver is null then it is OK to jump to the slow path.
-#ifdef CC_INTERP
-     __ ld(R3_RET, Interpreter::stackElementSize, R17_tos); // get receiver
-#else
-     Unimplemented();
-#endif
+    __ ld(R3_RET, Interpreter::stackElementSize, CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp)); // get receiver

    // Check if receiver == NULL and go the slow path.
    __ cmpdi(CCR0, R3_RET, 0);
    __ beq(CCR0, slow_path);

    // Load the value of the referent field.
-    __ load_heap_oop_not_null(R3_RET, referent_offset, R3_RET);
+    __ load_heap_oop(R3_RET, referent_offset, R3_RET);

    // Generate the G1 pre-barrier code to log the value of
    // the referent field in an SATB buffer. Note with
--- a/hotspot/src/cpu/ppc/vm/jni_ppc.h
+++ b/hotspot/src/cpu/ppc/vm/jni_ppc.h
@ -40,8 +40,10 @@
  #define JNIIMPORT
 #endif

-  #define JNICALL
-  typedef int jint;
+#define JNICALL
+
+typedef int jint;
+
 #if defined(_LP64)
  typedef long jlong;
 #else
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
@ -97,8 +97,10 @@ void MacroAssembler::store_sized_value(Register dst, RegisterOrConstant offs, Re
  }
 }

-void MacroAssembler::align(int modulus) {
-  while (offset() % modulus != 0) nop();
+void MacroAssembler::align(int modulus, int max, int rem) {
+  int padding = (rem + modulus - (offset() % modulus)) % modulus;
+  if (padding > max) return;
+  for (int c = (padding >> 2); c > 0; --c) { nop(); }
 }

 // Issue instructions that calculate given TOC from global TOC.
@ -186,16 +188,25 @@ address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr

 #ifdef _LP64
 // Patch compressed oops or klass constants.
+// Assembler sequence is
+// 1) compressed oops:
+//    lis  rx = const.hi
+//    ori rx = rx | const.lo
+// 2) compressed klass:
+//    lis  rx = const.hi
+//    clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
+//    ori rx = rx | const.lo
+// Clrldi will be passed by.
 int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
  assert(UseCompressedOops, "Should only patch compressed oops");

  const address inst2_addr = a;
  const int inst2 = *(int *)inst2_addr;

-  // The relocation points to the second instruction, the addi,
-  // and the addi reads and writes the same register dst.
-  const int dst = inv_rt_field(inst2);
-  assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
+  // The relocation points to the second instruction, the ori,
+  // and the ori reads and writes the same register dst.
+  const int dst = inv_rta_field(inst2);
+  assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst");
  // Now, find the preceding addis which writes to dst.
  int inst1 = 0;
  address inst1_addr = inst2_addr - BytesPerInstWord;
@ -210,8 +221,9 @@ int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop dat
  int xc = (data >> 16) & 0xffff;
  int xd = (data >>  0) & 0xffff;

-  set_imm((int *)inst1_addr,((short)(xc + ((xd & 0x8000) != 0 ? 1 : 0)))); // see enc_load_con_narrow1/2
+  set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
  set_imm((int *)inst2_addr, (short)(xd));
+
  return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
 }

@ -222,10 +234,10 @@ narrowOop MacroAssembler::get_narrow_oop(address a, address bound) {
  const address inst2_addr = a;
  const int inst2 = *(int *)inst2_addr;

-  // The relocation points to the second instruction, the addi,
-  // and the addi reads and writes the same register dst.
-  const int dst = inv_rt_field(inst2);
-  assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
+  // The relocation points to the second instruction, the ori,
+  // and the ori reads and writes the same register dst.
+  const int dst = inv_rta_field(inst2);
+  assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst");
  // Now, find the preceding lis which writes to dst.
  int inst1 = 0;
  address inst1_addr = inst2_addr - BytesPerInstWord;
@ -238,8 +250,9 @@ narrowOop MacroAssembler::get_narrow_oop(address a, address bound) {
  }
  assert(inst1_found, "inst is not lis");

-  uint xl = ((unsigned int) (get_imm(inst2_addr,0) & 0xffff));
-  uint xh = (((((xl & 0x8000) != 0 ? -1 : 0) + get_imm(inst1_addr,0)) & 0xffff) << 16);
+  uint xl = ((unsigned int) (get_imm(inst2_addr, 0) & 0xffff));
+  uint xh = (((get_imm(inst1_addr, 0)) & 0xffff) << 16);
+
  return (int) (xl | xh);
 }
 #endif // _LP64
@ -252,13 +265,10 @@ void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a,
  // FIXME: We should insert relocation information for oops at the constant
  // pool entries instead of inserting it at the loads; patching of a constant
  // pool entry should be less expensive.
-  Unimplemented();
-  if (false) {
-    address oop_address = address_constant((address)a.value(), RelocationHolder::none);
-    // Relocate at the pc of the load.
-    relocate(a.rspec());
-    toc_offset = (int)(oop_address - code()->consts()->start());
-  }
+  address oop_address = address_constant((address)a.value(), RelocationHolder::none);
+  // Relocate at the pc of the load.
+  relocate(a.rspec());
+  toc_offset = (int)(oop_address - code()->consts()->start());
  ld_largeoffset_unchecked(dst, toc_offset, toc, true);
 }

@ -532,7 +542,7 @@ void MacroAssembler::set_dest_of_bc_far_at(address instruction_addr, address des
      masm.b(dest);
    }
  }
-  ICache::invalidate_range(instruction_addr, code_size);
+  ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
 }

 // Emit a NOT mt-safe patchable 64 bit absolute call/jump.
@ -673,7 +683,7 @@ void MacroAssembler::set_dest_of_bxx64_patchable_at(address instruction_addr, ad
  CodeBuffer buf(instruction_addr, code_size);
  MacroAssembler masm(&buf);
  masm.bxx64_patchable(dest, relocInfo::none, link);
-  ICache::invalidate_range(instruction_addr, code_size);
+  ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
 }

 // Get dest address of a bxx64_patchable instruction.
@ -964,6 +974,14 @@ address MacroAssembler::call_c(Register fd) {
                       /*load env=*/true);
 }

+address MacroAssembler::call_c_and_return_to_caller(Register fd) {
+  return branch_to(fd, /*and_link=*/false,
+                       /*save toc=*/false,
+                       /*restore toc=*/false,
+                       /*load toc=*/true,
+                       /*load env=*/true);
+}
+
 address MacroAssembler::call_c(const FunctionDescriptor* fd, relocInfo::relocType rt) {
  if (rt != relocInfo::none) {
    // this call needs to be relocatable
@ -2315,7 +2333,7 @@ void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Ja
  if (last_Java_pc != noreg)
    std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread);

-  // set last_Java_sp last
+  // Set last_Java_sp last.
  std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread);
 }

@ -2454,6 +2472,57 @@ void MacroAssembler::reinit_heapbase(Register d, Register tmp) {
  }
 }

+// Clear Array
+// Kills both input registers. tmp == R0 is allowed.
+void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
+  // Procedure for large arrays (uses data cache block zero instruction).
+    Label startloop, fast, fastloop, small_rest, restloop, done;
+    const int cl_size         = VM_Version::get_cache_line_size(),
+              cl_dwords       = cl_size>>3,
+              cl_dw_addr_bits = exact_log2(cl_dwords),
+              dcbz_min        = 1;                     // Min count of dcbz executions, needs to be >0.
+
+//2:
+    cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included).
+    blt(CCR1, small_rest);                                      // Too small.
+    rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits);           // Extract dword offset within first cache line.
+    beq(CCR0, fast);                                            // Already 128byte aligned.
+
+    subfic(tmp, tmp, cl_dwords);
+    mtctr(tmp);                        // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
+    subf(cnt_dwords, tmp, cnt_dwords); // rest.
+    li(tmp, 0);
+//10:
+  bind(startloop);                     // Clear at the beginning to reach 128byte boundary.
+    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
+    addi(base_ptr, base_ptr, 8);
+    bdnz(startloop);
+//13:
+  bind(fast);                                  // Clear 128byte blocks.
+    srdi(tmp, cnt_dwords, cl_dw_addr_bits);    // Loop count for 128byte loop (>0).
+    andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
+    mtctr(tmp);                                // Load counter.
+//16:
+  bind(fastloop);
+    dcbz(base_ptr);                    // Clear 128byte aligned block.
+    addi(base_ptr, base_ptr, cl_size);
+    bdnz(fastloop);
+    if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
+//20:
+  bind(small_rest);
+    cmpdi(CCR0, cnt_dwords, 0);        // size 0?
+    beq(CCR0, done);                   // rest == 0
+    li(tmp, 0);
+    mtctr(cnt_dwords);                 // Load counter.
+//24:
+  bind(restloop);                      // Clear rest.
+    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
+    addi(base_ptr, base_ptr, 8);
+    bdnz(restloop);
+//27:
+  bind(done);
+}
+
 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////

 // Search for a single jchar in an jchar[].
@ -2926,12 +2995,11 @@ void MacroAssembler::verify_oop(Register oop, const char* msg) {
  if (!VerifyOops) {
    return;
  }
-  // will be preserved.
+  // Will be preserved.
  Register tmp = R11;
  assert(oop != tmp, "precondition");
  unsigned int nbytes_save = 10*8; // 10 volatile gprs
-  address/* FunctionDescriptor** */fd =
-    StubRoutines::verify_oop_subroutine_entry_address();
+  address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address();
  // save tmp
  mr(R0, tmp);
  // kill tmp
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp
@ -58,9 +58,24 @@ class MacroAssembler: public Assembler {

  // Move register if destination register and target register are different
  inline void mr_if_needed(Register rd, Register rs);
+  inline void fmr_if_needed(FloatRegister rd, FloatRegister rs);
+  // This is dedicated for emitting scheduled mach nodes. For better
+  // readability of the ad file I put it here.
+  // Endgroups are not needed if
+  //  - the scheduler is off
+  //  - the scheduler found that there is a natural group end, in that
+  //    case it reduced the size of the instruction used in the test
+  //    yielding 'needed'.
+  inline void endgroup_if_needed(bool needed);
+
+  // Memory barriers.
+  inline void membar(int bits);
+  inline void release();
+  inline void acquire();
+  inline void fence();

  // nop padding
-  void align(int modulus);
+  void align(int modulus, int max = 252, int rem = 0);

  //
  // Constants, loading constants, TOC support
@ -295,6 +310,8 @@ class MacroAssembler: public Assembler {
  // Call a C function via a function descriptor and use full C
  // calling conventions. Updates and returns _last_calls_return_pc.
  address call_c(Register function_descriptor);
+  // For tail calls: only branch, don't link, so callee returns to caller of this function.
+  address call_c_and_return_to_caller(Register function_descriptor);
  address call_c(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt);
  address call_c_using_toc(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt,
                           Register toc);
@ -320,7 +337,7 @@ class MacroAssembler: public Assembler {
    // the entry point
    address         entry_point,
    // flag which indicates if exception should be checked
-    bool            check_exception=true
+    bool            check_exception = true
  );

  // Support for VM calls. This is the base routine called by the
@ -530,9 +547,7 @@ class MacroAssembler: public Assembler {
  inline void null_check_throw(Register a, int offset, Register temp_reg, address exception_entry);

  // Check accessed object for null. Use SIGTRAP-based null checks on AIX.
-  inline void ld_with_trap_null_check(Register d, int si16, Register s1);
-  // Variant for heap OOPs including decompression of compressed OOPs.
-  inline void load_heap_oop_with_trap_null_check(Register d, RegisterOrConstant offs, Register s1);
+  inline void load_with_trap_null_check(Register d, int si16, Register s1);

  // Load heap oop and decompress. Loaded oop may not be null.
  inline void load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1 = noreg);
@ -584,6 +599,8 @@ class MacroAssembler: public Assembler {
           is_trap_range_check_g(x) || is_trap_range_check_ge(x);
  }

+  void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0);
+
  // Needle of length 1.
  void string_indexof_1(Register result, Register haystack, Register haycnt,
                        Register needle, jchar needleChar,
@ -630,7 +647,7 @@ class MacroAssembler: public Assembler {

  // TODO: verify method and klass metadata (compare against vptr?)
  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
-  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
+  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {}

 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp
@ -58,8 +58,25 @@ inline void MacroAssembler::round_to(Register r, int modulus) {

 // Move register if destination register and target register are different.
 inline void MacroAssembler::mr_if_needed(Register rd, Register rs) {
-  if(rs !=rd) mr(rd, rs);
+  if (rs != rd) mr(rd, rs);
 }
+inline void MacroAssembler::fmr_if_needed(FloatRegister rd, FloatRegister rs) {
+  if (rs != rd) fmr(rd, rs);
+}
+inline void MacroAssembler::endgroup_if_needed(bool needed) {
+  if (needed) {
+    endgroup();
+  }
+}
+
+inline void MacroAssembler::membar(int bits) {
+  // TODO: use elemental_membar(bits) for Power 8 and disable optimization of acquire-release
+  // (Matcher::post_membar_release where we use PPC64_ONLY(xop == Op_MemBarRelease ||))
+  if (bits & StoreLoad) sync(); else lwsync();
+}
+inline void MacroAssembler::release() { membar(LoadStore | StoreStore); }
+inline void MacroAssembler::acquire() { membar(LoadLoad | LoadStore); }
+inline void MacroAssembler::fence()   { membar(LoadLoad | LoadStore | StoreLoad | StoreStore); }

 // Address of the global TOC.
 inline address MacroAssembler::global_toc() {
@ -117,13 +134,12 @@ inline bool MacroAssembler::is_calculate_address_from_global_toc_at(address a, a
 inline bool MacroAssembler::is_set_narrow_oop(address a, address bound) {
  const address inst2_addr = a;
  const int inst2 = *(int *)a;
+  // The relocation points to the second instruction, the ori.
+  if (!is_ori(inst2)) return false;

-  // The relocation points to the second instruction, the addi.
-  if (!is_addi(inst2)) return false;
-
-  // The addi reads and writes the same register dst.
-  const int dst = inv_rt_field(inst2);
-  if (inv_ra_field(inst2) != dst) return false;
+  // The ori reads and writes the same register dst.
+  const int dst = inv_rta_field(inst2);
+  if (inv_rs_field(inst2) != dst) return false;

  // Now, find the preceding addis which writes to dst.
  int inst1 = 0;
@ -266,9 +282,10 @@ inline void MacroAssembler::trap_ic_miss_check(Register a, Register b) {
 // Do an explicit null check if access to a+offset will not raise a SIGSEGV.
 // Either issue a trap instruction that raises SIGTRAP, or do a compare that
 // branches to exception_entry.
-// No support for compressed oops (base page of heap).  Does not distinguish
+// No support for compressed oops (base page of heap). Does not distinguish
 // loads and stores.
-inline void MacroAssembler::null_check_throw(Register a, int offset, Register temp_reg, address exception_entry) {
+inline void MacroAssembler::null_check_throw(Register a, int offset, Register temp_reg,
+                                             address exception_entry) {
  if (!ImplicitNullChecks || needs_explicit_null_check(offset) || !os::zero_page_read_protected()) {
    if (TrapBasedNullChecks) {
      assert(UseSIGTRAP, "sanity");
@ -285,7 +302,7 @@ inline void MacroAssembler::null_check_throw(Register a, int offset, Register te
  }
 }

-inline void MacroAssembler::ld_with_trap_null_check(Register d, int si16, Register s1) {
+inline void MacroAssembler::load_with_trap_null_check(Register d, int si16, Register s1) {
  if (!os::zero_page_read_protected()) {
    if (TrapBasedNullChecks) {
      trap_null_check(s1);
@ -294,17 +311,6 @@ inline void MacroAssembler::ld_with_trap_null_check(Register d, int si16, Regist
  ld(d, si16, s1);
 }

-// Attention: No null check for loaded uncompressed OOP. Can be used for loading klass field.
-inline void MacroAssembler::load_heap_oop_with_trap_null_check(Register d, RegisterOrConstant si16,
-                                                                   Register s1) {
-  if ( !os::zero_page_read_protected()) {
-    if (TrapBasedNullChecks) {
-      trap_null_check(s1);
-    }
-  }
-  load_heap_oop_not_null(d, si16, s1);
-}
-
 inline void MacroAssembler::load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1) {
  if (UseCompressedOops) {
    lwz(d, offs, s1);
--- a/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp
@ -31,12 +31,16 @@

 #define __ _masm->

+#ifdef CC_INTERP
+#define EXCEPTION_ENTRY StubRoutines::throw_NullPointerException_at_call_entry()
+#else
+#define EXCEPTION_ENTRY Interpreter::throw_NullPointerException_entry()
+#endif
+
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) // nothing
-#define STOP(error) stop(error)
 #else
 #define BLOCK_COMMENT(str) __ block_comment(str)
-#define STOP(error) block_comment(error); __ stop(error)
 #endif

 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
@ -167,7 +171,7 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
                        sizeof(u2), /*is_signed*/ false);
    // assert(sizeof(u2) == sizeof(ConstMethod::_size_of_parameters), "");
    Label L;
-    __ ld(temp2, __ argument_offset(temp2, temp2, 0), R17_tos);
+    __ ld(temp2, __ argument_offset(temp2, temp2, 0), CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp));
    __ cmpd(CCR1, temp2, recv);
    __ beq(CCR1, L);
    __ stop("receiver not on stack");
@ -194,7 +198,7 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler*
    return NULL;
  }

-  Register argbase    = R17_tos; // parameter (preserved)
+  Register argbase    = CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp); // parameter (preserved)
  Register argslot    = R3;
  Register temp1      = R6;
  Register param_size = R7;
@ -271,7 +275,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
                                                    Register member_reg,
                                                    bool for_compiler_entry) {
  assert(is_signature_polymorphic(iid), "expected invoke iid");
-  Register temp1 = (for_compiler_entry ? R21_tmp1 : R7);
+  Register temp1 = (for_compiler_entry ? R25_tmp5 : R7);
  Register temp2 = (for_compiler_entry ? R22_tmp2 : R8);
  Register temp3 = (for_compiler_entry ? R23_tmp3 : R9);
  Register temp4 = (for_compiler_entry ? R24_tmp4 : R10);
@ -295,11 +299,10 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
      __ verify_oop(receiver_reg);
      if (iid == vmIntrinsics::_linkToSpecial) {
        // Don't actually load the klass; just null-check the receiver.
-        __ null_check_throw(receiver_reg, 0, temp1, StubRoutines::throw_NullPointerException_at_call_entry());
+        __ null_check_throw(receiver_reg, -1, temp1, EXCEPTION_ENTRY);
      } else {
        // load receiver klass itself
-        __ null_check_throw(receiver_reg, oopDesc::klass_offset_in_bytes(),
-                                temp1, StubRoutines::throw_NullPointerException_at_call_entry());
+        __ null_check_throw(receiver_reg, oopDesc::klass_offset_in_bytes(), temp1, EXCEPTION_ENTRY);
        __ load_klass(temp1_recv_klass, receiver_reg);
        __ verify_klass_ptr(temp1_recv_klass);
      }
@ -451,7 +454,7 @@ void trace_method_handle_stub(const char* adaptername,
  if (Verbose) {
    tty->print_cr("Registers:");
    const int abi_offset = frame::abi_112_size / 8;
-    for (int i = R3->encoding(); i <= R13->encoding(); i++) {
+    for (int i = R3->encoding(); i <= R12->encoding(); i++) {
      Register r = as_Register(i);
      int count = i - R3->encoding();
      // The registers are stored in reverse order on the stack (by save_volatile_gprs(R1_SP, abi_112_size)).
@ -490,7 +493,7 @@ void trace_method_handle_stub(const char* adaptername,
        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
      }

-      // safely create a frame and call frame::describe
+      // Safely create a frame and call frame::describe.
      intptr_t *dump_sp = trace_calling_frame.sender_sp();

      frame dump_frame = frame(dump_sp);
@ -531,7 +534,7 @@ void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adapt
  __ mr(R6_ARG4, R1_SP);
  __ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub));

-  __ restore_volatile_gprs(R1_SP, 112); // except R0
+  __ restore_volatile_gprs(R1_SP, 112); // Except R0.
  __ pop_frame();
  __ restore_LR_CR(R0);

--- a/hotspot/src/cpu/ppc/vm/nativeInst_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/nativeInst_ppc.cpp
@ -118,7 +118,7 @@ void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {

    a->bl(trampoline_stub_addr);
  }
-  ICache::invalidate_range(addr_call, code_size);
+  ICache::ppc64_flush_icache_bytes(addr_call, code_size);
 }

 address NativeCall::get_trampoline() {
@ -182,11 +182,13 @@ address NativeMovConstReg::next_instruction_address() const {

 intptr_t NativeMovConstReg::data() const {
  address   addr = addr_at(0);
-  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);

  if (MacroAssembler::is_load_const_at(addr)) {
    return MacroAssembler::get_const(addr);
-  } else if (MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) {
+  }
+
+  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);
+  if (MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) {
    narrowOop no = (narrowOop)MacroAssembler::get_narrow_oop(addr, cb->content_begin());
    return cast_from_oop<intptr_t>(oopDesc::decode_heap_oop(no));
  } else {
@ -213,19 +215,24 @@ address NativeMovConstReg::set_data_plain(intptr_t data, CodeBlob *cb) {
  } else if (cb != NULL &&
             MacroAssembler::is_calculate_address_from_global_toc_at(addr, cb->content_begin())) {
    // A calculation relative to the global TOC.
-    const int invalidated_range =
-      MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(),
-                                                                 (address)data);
-    const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
-    // FIXME:
-    const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
-    ICache::invalidate_range(start, range);
+    if (MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr, cb->content_begin()) !=
+        (address)data) {
+      const int invalidated_range =
+        MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(),
+                                                                   (address)data);
+      const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
+      // FIXME:
+      const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
+      ICache::ppc64_flush_icache_bytes(start, range);
+    }
    next_address = addr + 1 * BytesPerInstWord;
  } else if (MacroAssembler::is_load_const_at(addr)) {
    // A normal 5 instruction load_const code sequence.
-    // This is not mt safe, ok in methods like CodeBuffer::copy_code().
-    MacroAssembler::patch_const(addr, (long)data);
-    ICache::invalidate_range(addr, load_const_instruction_size);
+    if (MacroAssembler::get_const(addr) != (long)data) {
+      // This is not mt safe, ok in methods like CodeBuffer::copy_code().
+      MacroAssembler::patch_const(addr, (long)data);
+      ICache::ppc64_flush_icache_bytes(addr, load_const_instruction_size);
+    }
    next_address = addr + 5 * BytesPerInstWord;
  } else if (MacroAssembler::is_bl(* (int*) addr)) {
    // A single branch-and-link instruction.
@ -234,7 +241,7 @@ address NativeMovConstReg::set_data_plain(intptr_t data, CodeBlob *cb) {
    CodeBuffer cb(addr, code_size + 1);
    MacroAssembler* a = new MacroAssembler(&cb);
    a->bl((address) data);
-    ICache::invalidate_range(addr, code_size);
+    ICache::ppc64_flush_icache_bytes(addr, code_size);
    next_address = addr + code_size;
  } else {
    ShouldNotReachHere();
@ -279,12 +286,13 @@ void NativeMovConstReg::set_data(intptr_t data) {
 void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL */) {
  address   addr = addr_at(0);
  CodeBlob* cb = (code) ? code : CodeCache::find_blob(instruction_address());
+  if (MacroAssembler::get_narrow_oop(addr, cb->content_begin()) == (long)data) return;
  const int invalidated_range =
    MacroAssembler::patch_set_narrow_oop(addr, cb->content_begin(), (long)data);
  const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
  // FIXME:
  const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
-  ICache::invalidate_range(start, range);
+  ICache::ppc64_flush_icache_bytes(start, range);
 }

 // Do not use an assertion here. Let clients decide whether they only
@ -292,15 +300,16 @@ void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL
 #ifdef ASSERT
 void NativeMovConstReg::verify() {
  address   addr = addr_at(0);
-  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // find_nmethod() asserts if nmethod is zombie.
  if (! MacroAssembler::is_load_const_at(addr) &&
-      ! MacroAssembler::is_load_const_from_method_toc_at(addr) &&
-      ! (cb != NULL && MacroAssembler::is_calculate_address_from_global_toc_at(addr, cb->content_begin())) &&
-      ! (cb != NULL && MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) &&
-      ! MacroAssembler::is_bl(*((int*) addr))) {
-    tty->print_cr("not a NativeMovConstReg at " PTR_FORMAT, addr);
-    // TODO: PPC port Disassembler::decode(addr, 20, 20, tty);
-    fatal(err_msg("not a NativeMovConstReg at " PTR_FORMAT, addr));
+      ! MacroAssembler::is_load_const_from_method_toc_at(addr)) {
+    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // find_nmethod() asserts if nmethod is zombie.
+    if (! (cb != NULL && MacroAssembler::is_calculate_address_from_global_toc_at(addr, cb->content_begin())) &&
+        ! (cb != NULL && MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) &&
+        ! MacroAssembler::is_bl(*((int*) addr))) {
+      tty->print_cr("not a NativeMovConstReg at " PTR_FORMAT, addr);
+      // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty);
+      fatal(err_msg("not a NativeMovConstReg at " PTR_FORMAT, addr));
+    }
  }
 }
 #endif // ASSERT
@ -326,7 +335,7 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add
      a->illtrap();
    }
  }
-  ICache::invalidate_range(verified_entry, code_size);
+  ICache::ppc64_flush_icache_bytes(verified_entry, code_size);
 }

 #ifdef ASSERT
--- a/hotspot/src/cpu/ppc/vm/nativeInst_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/nativeInst_ppc.hpp
@ -132,7 +132,7 @@ inline NativeInstruction* nativeInstruction_at(address address) {
 class NativeCall: public NativeInstruction {
 public:

-  enum specific_constants {
+  enum ppc_specific_constants {
    load_const_instruction_size                 = 28,
    load_const_from_method_toc_instruction_size = 16,
    instruction_size                            = 16 // Used in shared code for calls with reloc_info.
@ -240,7 +240,7 @@ inline NativeFarCall* nativeFarCall_at(address instr) {
 class NativeMovConstReg: public NativeInstruction {
 public:

-  enum specific_constants {
+  enum ppc_specific_constants {
    load_const_instruction_size                 = 20,
    load_const_from_method_toc_instruction_size =  8,
    instruction_size                            =  8 // Used in shared code for calls with reloc_info.
@ -279,7 +279,7 @@ class NativeJump: public NativeInstruction {
  // We use MacroAssembler::b64_patchable() for implementing a
  // jump-anywhere instruction.

-  enum specific_constants {
+  enum ppc_specific_constants {
    instruction_size = MacroAssembler::b64_patchable_size
  };

@ -384,7 +384,6 @@ class NativeCallTrampolineStub : public NativeInstruction {
  void set_destination(address new_destination);
 };

-
 inline bool is_NativeCallTrampolineStub_at(address address) {
  int first_instr = *(int*)address;
  return Assembler::is_addis(first_instr) &&
--- a/hotspot/src/cpu/ppc/vm/ppc.ad
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad
--- a/hotspot/src/cpu/ppc/vm/ppc_64.ad
+++ b/hotspot/src/cpu/ppc/vm/ppc_64.ad
@ -0,0 +1,24 @@
+//
+// Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
+// Copyright 2012, 2013 SAP AG. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
--- a/hotspot/src/cpu/ppc/vm/register_definitions_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/register_definitions_ppc.cpp
@ -30,8 +30,8 @@
 #include "asm/macroAssembler.hpp"
 #include "asm/register.hpp"
 #include "register_ppc.hpp"
-#ifdef TARGET_ARCH_MODEL_32
-# include "interp_masm_32.hpp"
+#ifdef TARGET_ARCH_MODEL_ppc_32
+# include "interp_masm_ppc_32.hpp"
 #endif
 #ifdef TARGET_ARCH_MODEL_ppc_64
 # include "interp_masm_ppc_64.hpp"
--- a/hotspot/src/cpu/ppc/vm/register_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/register_ppc.cpp
@ -44,7 +44,7 @@ const char* RegisterImpl::name() const {

 const char* ConditionRegisterImpl::name() const {
  const char* names[number_of_registers] = {
-    "CR0",  "CR1",  "CR2",  "CR3",  "CCR4",  "CCR5",  "CCR6",  "CCR7"
+    "CR0",  "CR1",  "CR2",  "CR3",  "CR4",  "CR5",  "CR6",  "CR7"
  };
  return is_valid() ? names[encoding()] : "cnoreg";
 }
@ -61,7 +61,7 @@ const char* FloatRegisterImpl::name() const {

 const char* SpecialRegisterImpl::name() const {
  const char* names[number_of_registers] = {
-    "SR_XER",  "SR_LR",   "SR_CTR",  "SR_VRSAVE",   "R1_SPEFSCR",  "SR_PPR"
+    "SR_XER", "SR_LR", "SR_CTR", "SR_VRSAVE", "SR_SPEFSCR", "SR_PPR"
  };
  return is_valid() ? names[encoding()] : "snoreg";
 }
--- a/hotspot/src/cpu/ppc/vm/register_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/register_ppc.hpp
@ -60,8 +60,8 @@ typedef VMRegImpl* VMReg;
 //  FPSCR     Floating point status and control register (volatile)
 //
 //  CR0-CR1   Condition code fields (volatile)
-//  CR2-CCR4   Condition code fields (nonvolatile)
-//  CCR5-CCR7   Condition code fields (volatile)
+//  CR2-CR4   Condition code fields (nonvolatile)
+//  CR5-CR7   Condition code fields (volatile)
 //
 //  ----------------------------------------------
 //  On processors with the VMX feature:
@ -531,7 +531,7 @@ REGISTER_DECLARATION(Register,      R7_ARG5,    R7);  // volatile
 REGISTER_DECLARATION(Register,      R8_ARG6,    R8);  // volatile
 REGISTER_DECLARATION(Register,      R9_ARG7,    R9);  // volatile
 REGISTER_DECLARATION(Register,      R10_ARG8,   R10); // volatile
-REGISTER_DECLARATION(FloatRegister, FO_SCRATCH, F0);  // volatile
+REGISTER_DECLARATION(FloatRegister, F0_SCRATCH, F0);  // volatile
 REGISTER_DECLARATION(FloatRegister, F1_RET,     F1);  // volatile
 REGISTER_DECLARATION(FloatRegister, F1_ARG1,    F1);  // volatile
 REGISTER_DECLARATION(FloatRegister, F2_ARG2,    F2);  // volatile
@ -560,7 +560,7 @@ REGISTER_DECLARATION(FloatRegister, F13_ARG13,  F13); // volatile
 #define R8_ARG6            AS_REGISTER(Register, R8)
 #define R9_ARG7            AS_REGISTER(Register, R9)
 #define R10_ARG8           AS_REGISTER(Register, R10)
-#define FO_SCRATCH         AS_REGISTER(FloatRegister, F0)
+#define F0_SCRATCH         AS_REGISTER(FloatRegister, F0)
 #define F1_RET             AS_REGISTER(FloatRegister, F1)
 #define F1_ARG1            AS_REGISTER(FloatRegister, F1)
 #define F2_ARG2            AS_REGISTER(FloatRegister, F2)
@ -608,7 +608,6 @@ REGISTER_DECLARATION(Register, R26_tmp6, R26);
 REGISTER_DECLARATION(Register, R27_tmp7, R27);
 REGISTER_DECLARATION(Register, R28_tmp8, R28);
 REGISTER_DECLARATION(Register, R29_tmp9, R29);
-REGISTER_DECLARATION(Register, R30_polling_page, R30);
 #ifndef DONT_USE_REGISTER_DEFINES
 #define R21_tmp1         AS_REGISTER(Register, R21)
 #define R22_tmp2         AS_REGISTER(Register, R22)
@ -619,7 +618,6 @@ REGISTER_DECLARATION(Register, R30_polling_page, R30);
 #define R27_tmp7         AS_REGISTER(Register, R27)
 #define R28_tmp8         AS_REGISTER(Register, R28)
 #define R29_tmp9         AS_REGISTER(Register, R29)
-#define R30_polling_page AS_REGISTER(Register, R30)

 #define CCR4_is_synced AS_REGISTER(ConditionRegister, CCR4)
 #endif
--- a/hotspot/src/cpu/ppc/vm/runtime_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/runtime_ppc.cpp
@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#ifdef COMPILER2
+#include "asm/assembler.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "code/vmreg.hpp"
+#include "interpreter/interpreter.hpp"
+#include "nativeInst_ppc.hpp"
+#include "opto/runtime.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "vmreg_ppc.inline.hpp"
+#endif
+
+#define __ masm->
+
+
+#ifdef COMPILER2
+
+// SP adjustment (must use unextended SP) for method handle call sites
+// during exception handling.
+static intptr_t adjust_SP_for_methodhandle_callsite(JavaThread *thread) {
+  RegisterMap map(thread, false);
+  // The frame constructor will do the correction for us (see frame::adjust_unextended_SP).
+  frame mh_caller_frame = thread->last_frame().sender(&map);
+  assert(mh_caller_frame.is_compiled_frame(), "Only may reach here for compiled MH call sites");
+  return (intptr_t) mh_caller_frame.unextended_sp();
+}
+
+//------------------------------generate_exception_blob---------------------------
+// Creates exception blob at the end.
+// Using exception blob, this code is jumped from a compiled method.
+//
+// Given an exception pc at a call we call into the runtime for the
+// handler in this method. This handler might merely restore state
+// (i.e. callee save registers) unwind the frame and jump to the
+// exception handler for the nmethod if there is no Java level handler
+// for the nmethod.
+//
+// This code is entered with a jmp.
+//
+// Arguments:
+//   R3_ARG1: exception oop
+//   R4_ARG2: exception pc
+//
+// Results:
+//   R3_ARG1: exception oop
+//   R4_ARG2: exception pc in caller
+//   destination: exception handler of caller
+//
+// Note: the exception pc MUST be at a call (precise debug information)
+//
+void OptoRuntime::generate_exception_blob() {
+  // Allocate space for the code.
+  ResourceMark rm;
+  // Setup code generation tools.
+  CodeBuffer buffer("exception_blob", 2048, 1024);
+  InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
+
+  address start = __ pc();
+
+  int frame_size_in_bytes = frame::abi_112_size;
+  OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
+
+  // Exception pc is 'return address' for stack walker.
+  __ std(R4_ARG2/*exception pc*/, _abi(lr), R1_SP);
+
+  // Store the exception in the Thread object.
+  __ std(R3_ARG1/*exception oop*/, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
+  __ std(R4_ARG2/*exception pc*/,  in_bytes(JavaThread::exception_pc_offset()),  R16_thread);
+
+  // Save callee-saved registers.
+  // Push a C frame for the exception blob. It is needed for the C call later on.
+  __ push_frame_abi112(0, R11_scratch1);
+
+  // This call does all the hard work. It checks if an exception handler
+  // exists in the method.
+  // If so, it returns the handler address.
+  // If not, it prepares for stack-unwinding, restoring the callee-save
+  // registers of the frame being removed.
+  __ set_last_Java_frame(/*sp=*/R1_SP, noreg);
+
+  __ mr(R3_ARG1, R16_thread);
+  __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, OptoRuntime::handle_exception_C),
+            relocInfo::none);
+  address calls_return_pc = __ last_calls_return_pc();
+# ifdef ASSERT
+  __ cmpdi(CCR0, R3_RET, 0);
+  __ asm_assert_ne("handle_exception_C must not return NULL", 0x601);
+# endif
+
+  // Set an oopmap for the call site. This oopmap will only be used if we
+  // are unwinding the stack. Hence, all locations will be dead.
+  // Callee-saved registers will be the same as the frame above (i.e.,
+  // handle_exception_stub), since they were restored when we got the
+  // exception.
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(calls_return_pc - start, map);
+
+  // Get unextended_sp for method handle call sites.
+  Label mh_callsite, mh_done; // Use a 2nd c call if it's a method handle call site.
+  __ lwa(R4_ARG2, in_bytes(JavaThread::is_method_handle_return_offset()), R16_thread);
+  __ cmpwi(CCR0, R4_ARG2, 0);
+  __ bne(CCR0, mh_callsite);
+
+  __ mtctr(R3_RET); // Move address of exception handler to SR_CTR.
+  __ reset_last_Java_frame();
+  __ pop_frame();
+
+  __ bind(mh_done);
+  // We have a handler in register SR_CTR (could be deopt blob).
+
+  // Get the exception oop.
+  __ ld(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
+
+  // Get the exception pc in case we are deoptimized.
+  __ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
+
+  // Reset thread values.
+  __ li(R0, 0);
+#ifdef ASSERT
+  __ std(R0, in_bytes(JavaThread::exception_handler_pc_offset()), R16_thread);
+  __ std(R0, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
+#endif
+  // Clear the exception oop so GC no longer processes it as a root.
+  __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
+
+  // Move exception pc into SR_LR.
+  __ mtlr(R4_ARG2);
+  __ bctr();
+
+
+  // Same as above, but also set sp to unextended_sp.
+  __ bind(mh_callsite);
+  __ mr(R31, R3_RET); // Save branch address.
+  __ mr(R3_ARG1, R16_thread);
+  __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, adjust_SP_for_methodhandle_callsite), relocInfo::none);
+  // Returns unextended_sp in R3_RET.
+
+  __ mtctr(R31); // Move address of exception handler to SR_CTR.
+  __ reset_last_Java_frame();
+
+  __ mr(R1_SP, R3_RET); // Set sp to unextended_sp.
+  __ b(mh_done);
+
+
+  // Make sure all code is generated.
+  masm->flush();
+
+  // Set exception blob.
+  _exception_blob = ExceptionBlob::create(&buffer, oop_maps,
+                                          frame_size_in_bytes/wordSize);
+}
+
+#endif // COMPILER2
--- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp
@ -687,17 +687,9 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
    F13->as_VMReg()
  };

-  const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
-  const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
-
-  // The first 8 arguments are not passed on the stack.
-  const int num_args_in_regs = 8;
-#define put_arg_in_reg(arg) ((arg) < num_args_in_regs)
-
  // Check calling conventions consistency.
-  assert(num_iarg_registers == num_args_in_regs
-         && num_iarg_registers == 8
-         && num_farg_registers == 13,
+  assert(sizeof(iarg_reg) / sizeof(iarg_reg[0]) == Argument::n_int_register_parameters_c &&
+         sizeof(farg_reg) / sizeof(farg_reg[0]) == Argument::n_float_register_parameters_c,
         "consistency");

  // `Stk' counts stack slots. Due to alignment, 32 bit values occupy
@ -705,8 +697,6 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
  const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats
  const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles

-  int ill_i = 0;
-  int ill_t = 0;
  int i;
  VMReg reg;
  // Leave room for C-compatible ABI_112.
@ -726,6 +716,11 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
    if (regs2 != NULL) regs2[i].set_bad();

    switch(sig_bt[i]) {
+
+    //
+    // If arguments 0-7 are integers, they are passed in integer registers.
+    // Argument i is placed in iarg_reg[i].
+    //
    case T_BOOLEAN:
    case T_CHAR:
    case T_BYTE:
@ -754,7 +749,7 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
    case T_ADDRESS:
    case T_METADATA:
      // Oops are already boxed if required (JNI).
-      if (put_arg_in_reg(arg)) {
+      if (arg < Argument::n_int_register_parameters_c) {
        reg = iarg_reg[arg];
      } else {
        reg = VMRegImpl::stack2reg(stk);
@ -762,57 +757,66 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
      }
      regs[i].set2(reg);
      break;
+
+    //
+    // Floats are treated differently from int regs:  The first 13 float arguments
+    // are passed in registers (not the float args among the first 13 args).
+    // Thus argument i is NOT passed in farg_reg[i] if it is float.  It is passed
+    // in farg_reg[j] if argument i is the j-th float argument of this call.
+    //
    case T_FLOAT:
-      if (put_arg_in_reg(arg)) {
+      if (freg < Argument::n_float_register_parameters_c) {
+        // Put float in register ...
        reg = farg_reg[freg];
+        ++freg;
+
+        // Argument i for i > 8 is placed on the stack even if it's
+        // placed in a register (if it's a float arg). Aix disassembly
+        // shows that xlC places these float args on the stack AND in
+        // a register. This is not documented, but we follow this
+        // convention, too.
+        if (arg >= Argument::n_regs_not_on_stack_c) {
+          // ... and on the stack.
+          guarantee(regs2 != NULL, "must pass float in register and stack slot");
+          VMReg reg2 = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
+          regs2[i].set1(reg2);
+          stk += inc_stk_for_intfloat;
+        }
+
      } else {
-        // Put float on stack
-#       if defined(LINUX)
-        reg = VMRegImpl::stack2reg(stk+1);
-#       elif defined(AIX)
-        reg = VMRegImpl::stack2reg(stk);
-#       else
-#       error "unknown OS"
-#       endif
+        // Put float on stack.
+        reg = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
        stk += inc_stk_for_intfloat;
      }
-
-      if (freg < num_farg_registers) {
-        // There are still some float argument registers left. Put the
-        // float in a register if not already done.
-        if (reg != farg_reg[freg]) {
-          guarantee(regs2 != NULL, "must pass float in register and stack slot");
-          VMReg reg2 = farg_reg[freg];
-          regs2[i].set1(reg2);
-        }
-        ++freg;
-      }
-
      regs[i].set1(reg);
      break;
    case T_DOUBLE:
      assert(sig_bt[i+1] == T_VOID, "expecting half");
-      if (put_arg_in_reg(arg)) {
+      if (freg < Argument::n_float_register_parameters_c) {
+        // Put double in register ...
        reg = farg_reg[freg];
+        ++freg;
+
+        // Argument i for i > 8 is placed on the stack even if it's
+        // placed in a register (if it's a double arg). Aix disassembly
+        // shows that xlC places these float args on the stack AND in
+        // a register. This is not documented, but we follow this
+        // convention, too.
+        if (arg >= Argument::n_regs_not_on_stack_c) {
+          // ... and on the stack.
+          guarantee(regs2 != NULL, "must pass float in register and stack slot");
+          VMReg reg2 = VMRegImpl::stack2reg(stk);
+          regs2[i].set2(reg2);
+          stk += inc_stk_for_longdouble;
+        }
      } else {
        // Put double on stack.
        reg = VMRegImpl::stack2reg(stk);
        stk += inc_stk_for_longdouble;
      }
-
-      if (freg < num_farg_registers) {
-        // There are still some float argument registers left. Put the
-        // float in a register if not already done.
-        if (reg != farg_reg[freg]) {
-          guarantee(regs2 != NULL, "must pass float in register and stack slot");
-          VMReg reg2 = farg_reg[freg];
-          regs2[i].set2(reg2);
-        }
-        ++freg;
-      }
-
      regs[i].set2(reg);
      break;
+
    case T_VOID:
      // Do not count halves.
      regs[i].set_bad();
@ -877,7 +881,7 @@ static address gen_c2i_adapter(MacroAssembler *masm,
  __ mtlr(return_pc);


-  // call the interpreter
+  // Call the interpreter.
  __ BIND(call_interpreter);
  __ mtctr(ientry);

@ -947,8 +951,12 @@ static address gen_c2i_adapter(MacroAssembler *masm,

  // Jump to the interpreter just as if interpreter was doing it.

+#ifdef CC_INTERP
+  const Register tos = R17_tos;
+#endif
+
  // load TOS
-  __ addi(R17_tos, R1_SP, st_off);
+  __ addi(tos, R1_SP, st_off);

  // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in R21_tmp1.
  assert(sender_SP == R21_sender_SP, "passing initial caller's SP in wrong register");
@ -982,7 +990,9 @@ static void gen_i2c_adapter(MacroAssembler *masm,
  // save code can segv when fxsave instructions find improperly
  // aligned stack pointer.

+#ifdef CC_INTERP
  const Register ld_ptr = R17_tos;
+#endif
  const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 };
  const int num_value_regs = sizeof(value_regs) / sizeof(Register);
  int value_regs_index = 0;
@ -1137,7 +1147,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
      __ bne_predict_taken(CCR0, valid);
      // We have a null argument, branch to ic_miss_stub.
      __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
-                           relocInfo::runtime_call_type);
+                       relocInfo::runtime_call_type);
      __ BIND(valid);
    }
  }
@ -1154,7 +1164,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
    __ beq_predict_taken(CCR0, valid);
    // We have an unexpected klass, branch to ic_miss_stub.
    __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
-                         relocInfo::runtime_call_type);
+                     relocInfo::runtime_call_type);
    __ BIND(valid);
  }

@ -1170,8 +1180,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
  __ beq_predict_taken(CCR0, call_interpreter);

  // Branch to ic_miss_stub.
-  __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
-                       relocInfo::runtime_call_type);
+  __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);

  // entry: c2i

@ -2594,7 +2603,11 @@ static void push_skeleton_frame(MacroAssembler* masm, bool deopt,
  __ ld(frame_size_reg, 0, frame_sizes_reg);
  __ std(pc_reg, _abi(lr), R1_SP);
  __ push_frame(frame_size_reg, R0/*tmp*/);
+#ifdef CC_INTERP
  __ std(R1_SP, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
+#else
+  Unimplemented();
+#endif
  __ addi(number_of_frames_reg, number_of_frames_reg, -1);
  __ addi(frame_sizes_reg, frame_sizes_reg, wordSize);
  __ addi(pcs_reg, pcs_reg, wordSize);
@ -2693,7 +2706,9 @@ static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
  // Store it in the top interpreter frame.
  __ std(R0, _abi(lr), R1_SP);
  // Initialize frame_manager_lr of interpreter top frame.
+#ifdef CC_INTERP
  __ std(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
+#endif
 }
 #endif

@ -2886,8 +2901,7 @@ void SharedRuntime::generate_deopt_blob() {

  // Initialize R14_state.
  __ ld(R14_state, 0, R1_SP);
-  __ addi(R14_state, R14_state,
-              -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
+  __ addi(R14_state, R14_state, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
  // Also inititialize R15_prev_state.
  __ restore_prev_state();

@ -3010,8 +3024,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {

  // Initialize R14_state, ...
  __ ld(R11_scratch1, 0, R1_SP);
-  __ addi(R14_state, R11_scratch1,
-              -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
+  __ addi(R14_state, R11_scratch1, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
  // also initialize R15_prev_state.
  __ restore_prev_state();
  // Return to the interpreter entry point.
--- a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp
@ -146,14 +146,14 @@ class StubGenerator: public StubCodeGenerator {
      // FIXME: use round_to() here
      __ andi_(r_frame_alignment_in_bytes, r_arg_argument_count, 1);
      __ sldi(r_frame_alignment_in_bytes,
-                  r_frame_alignment_in_bytes, Interpreter::logStackElementSize);
+              r_frame_alignment_in_bytes, Interpreter::logStackElementSize);

      // size = unaligned size of arguments + top abi's size
      __ addi(r_frame_size, r_argument_size_in_bytes,
              frame::top_ijava_frame_abi_size);
      // size += arguments alignment
      __ add(r_frame_size,
-                 r_frame_size, r_frame_alignment_in_bytes);
+             r_frame_size, r_frame_alignment_in_bytes);
      // size += size of call_stub locals
      __ addi(r_frame_size,
              r_frame_size, frame::entry_frame_locals_size);
@ -179,7 +179,7 @@ class StubGenerator: public StubCodeGenerator {
      __ addi(r_top_of_arguments_addr,
              R1_SP, frame::top_ijava_frame_abi_size);
      __ add(r_top_of_arguments_addr,
-                 r_top_of_arguments_addr, r_frame_alignment_in_bytes);
+             r_top_of_arguments_addr, r_frame_alignment_in_bytes);

      // any arguments to copy?
      __ cmpdi(CCR0, r_arg_argument_count, 0);
@ -229,22 +229,23 @@ class StubGenerator: public StubCodeGenerator {

      // Register state on entry to frame manager / native entry:
      //
-      //   R17_tos     -  intptr_t*    sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8
+      //   tos         -  intptr_t*    sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8
      //   R19_method  -  Method
      //   R16_thread  -  JavaThread*

-      // R17_tos must point to last argument - element_size.
-      __ addi(R17_tos, r_top_of_arguments_addr, -Interpreter::stackElementSize);
+      // Tos must point to last argument - element_size.
+      const Register tos = R17_tos;
+      __ addi(tos, r_top_of_arguments_addr, -Interpreter::stackElementSize);

      // initialize call_stub locals (step 2)
-      // now save R17_tos as arguments_tos_address
-      __ std(R17_tos, _entry_frame_locals_neg(arguments_tos_address), r_entryframe_fp);
+      // now save tos as arguments_tos_address
+      __ std(tos, _entry_frame_locals_neg(arguments_tos_address), r_entryframe_fp);

      // load argument registers for call
      __ mr(R19_method, r_arg_method);
      __ mr(R16_thread, r_arg_thread);
-      assert(R17_tos != r_arg_method, "trashed r_arg_method");
-      assert(R17_tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread");
+      assert(tos != r_arg_method, "trashed r_arg_method");
+      assert(tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread");

      // Set R15_prev_state to 0 for simplifying checks in callee.
      __ li(R15_prev_state, 0);
@ -274,7 +275,7 @@ class StubGenerator: public StubCodeGenerator {
      // Do a light-weight C-call here, r_new_arg_entry holds the address
      // of the interpreter entry point (frame manager or native entry)
      // and save runtime-value of LR in return_address.
-      assert(r_new_arg_entry != R17_tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread,
+      assert(r_new_arg_entry != tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread,
             "trashed r_new_arg_entry");
      return_address = __ call_stub(r_new_arg_entry);
    }
@ -326,8 +327,8 @@ class StubGenerator: public StubCodeGenerator {
      // T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE is treated as T_INT.
      __ cmpwi(CCR0, r_arg_result_type, T_OBJECT);
      __ cmpwi(CCR1, r_arg_result_type, T_LONG);
-      __ cmpwi(CCR5,  r_arg_result_type, T_FLOAT);
-      __ cmpwi(CCR6,  r_arg_result_type, T_DOUBLE);
+      __ cmpwi(CCR5, r_arg_result_type, T_FLOAT);
+      __ cmpwi(CCR6, r_arg_result_type, T_DOUBLE);

      // restore non-volatile registers
      __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14));
@ -345,8 +346,8 @@ class StubGenerator: public StubCodeGenerator {

      __ beq(CCR0, ret_is_object);
      __ beq(CCR1, ret_is_long);
-      __ beq(CCR5,  ret_is_float);
-      __ beq(CCR6,  ret_is_double);
+      __ beq(CCR5, ret_is_float);
+      __ beq(CCR6, ret_is_double);

      // default:
      __ stw(R3_RET, 0, r_arg_result_addr);
@ -614,6 +615,17 @@ class StubGenerator: public StubCodeGenerator {
        if (!dest_uninitialized) {
          const int spill_slots = 4 * wordSize;
          const int frame_size  = frame::abi_112_size + spill_slots;
+          Label filtered;
+
+          // Is marking active?
+          if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+            __ lwz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread);
+          } else {
+            guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+            __ lbz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread);
+          }
+          __ cmpdi(CCR0, Rtmp1, 0);
+          __ beq(CCR0, filtered);

          __ save_LR_CR(R0);
          __ push_frame_abi112(spill_slots, R0);
@ -628,6 +640,8 @@ class StubGenerator: public StubCodeGenerator {
          __ ld(count, frame_size - 3 * wordSize, R1_SP);
          __ pop_frame();
          __ restore_LR_CR(R0);
+
+          __ bind(filtered);
        }
        break;
      case BarrierSet::CardTableModRef:
@ -648,21 +662,28 @@ class StubGenerator: public StubCodeGenerator {
  //
  //  The input registers and R0 are overwritten.
  //
-  void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp) {
+  void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp, bool branchToEnd) {
    BarrierSet* const bs = Universe::heap()->barrier_set();

    switch (bs->kind()) {
      case BarrierSet::G1SATBCT:
      case BarrierSet::G1SATBCTLogging:
        {
-          __ save_LR_CR(R0);
-          // We need this frame only that the callee can spill LR/CR.
-          __ push_frame_abi112(0, R0);
-
-          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
-
-          __ pop_frame();
-          __ restore_LR_CR(R0);
+          if (branchToEnd) {
+            __ save_LR_CR(R0);
+            // We need this frame only to spill LR.
+            __ push_frame_abi112(0, R0);
+            __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
+            __ pop_frame();
+            __ restore_LR_CR(R0);
+          } else {
+            // Tail call: fake call from stub caller by branching without linking.
+            address entry_point = (address)CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post);
+            __ mr_if_needed(R3_ARG1, addr);
+            __ mr_if_needed(R4_ARG2, count);
+            __ load_const(R11, entry_point, R0);
+            __ call_c_and_return_to_caller(R11);
+          }
        }
        break;
      case BarrierSet::CardTableModRef:
@ -697,9 +718,12 @@ class StubGenerator: public StubCodeGenerator {
          __ addi(addr, addr, 1);
          __ bdnz(Lstore_loop);
          __ bind(Lskip_loop);
+
+          if (!branchToEnd) __ blr();
        }
      break;
      case BarrierSet::ModRef:
+        if (!branchToEnd) __ blr();
        break;
      default:
        ShouldNotReachHere();
@ -847,30 +871,28 @@ class StubGenerator: public StubCodeGenerator {
  // The code is implemented(ported from sparc) as we believe it benefits JVM98, however
  // tracing(-XX:+TraceOptimizeFill) shows the intrinsic replacement doesn't happen at all!
  //
-  // Source code in function is_range_check_if() shows OptimizeFill relaxed the condition
+  // Source code in function is_range_check_if() shows that OptimizeFill relaxed the condition
  // for turning on loop predication optimization, and hence the behavior of "array range check"
  // and "loop invariant check" could be influenced, which potentially boosted JVM98.
  //
-  // We leave the code here and see if Oracle has updates in later releases(later than HS20).
-  //
-  //  Generate stub for disjoint short fill.  If "aligned" is true, the
-  //  "to" address is assumed to be heapword aligned.
+  // Generate stub for disjoint short fill. If "aligned" is true, the
+  // "to" address is assumed to be heapword aligned.
  //
  // Arguments for generated stub:
-  //      to:    R3_ARG1
-  //      value: R4_ARG2
-  //      count: R5_ARG3 treated as signed
+  //   to:    R3_ARG1
+  //   value: R4_ARG2
+  //   count: R5_ARG3 treated as signed
  //
  address generate_fill(BasicType t, bool aligned, const char* name) {
    StubCodeMark mark(this, "StubRoutines", name);
    address start = __ emit_fd();

-    const Register to        = R3_ARG1;   // source array address
-    const Register value     = R4_ARG2;   // fill value
-    const Register count     = R5_ARG3;   // elements count
-    const Register temp      = R6_ARG4;   // temp register
+    const Register to    = R3_ARG1;   // source array address
+    const Register value = R4_ARG2;   // fill value
+    const Register count = R5_ARG3;   // elements count
+    const Register temp  = R6_ARG4;   // temp register

-    //assert_clean_int(count, O3);     // Make sure 'count' is clean int.
+    //assert_clean_int(count, O3);    // Make sure 'count' is clean int.

    Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
    Label L_fill_2_bytes, L_fill_4_bytes, L_fill_elements, L_fill_32_bytes;
@ -879,31 +901,31 @@ class StubGenerator: public StubCodeGenerator {
    switch (t) {
       case T_BYTE:
        shift = 2;
-        // clone bytes (zero extend not needed because store instructions below ignore high order bytes)
+        // Clone bytes (zero extend not needed because store instructions below ignore high order bytes).
        __ rldimi(value, value, 8, 48);     // 8 bit -> 16 bit
-        __ cmpdi(CCR0, count, 2<<shift);    // Short arrays (< 8 bytes) fill by element
+        __ cmpdi(CCR0, count, 2<<shift);    // Short arrays (< 8 bytes) fill by element.
        __ blt(CCR0, L_fill_elements);
        __ rldimi(value, value, 16, 32);    // 16 bit -> 32 bit
        break;
       case T_SHORT:
        shift = 1;
-        // clone bytes (zero extend not needed because store instructions below ignore high order bytes)
+        // Clone bytes (zero extend not needed because store instructions below ignore high order bytes).
        __ rldimi(value, value, 16, 32);    // 16 bit -> 32 bit
-        __ cmpdi(CCR0, count, 2<<shift);    // Short arrays (< 8 bytes) fill by element
+        __ cmpdi(CCR0, count, 2<<shift);    // Short arrays (< 8 bytes) fill by element.
        __ blt(CCR0, L_fill_elements);
        break;
      case T_INT:
        shift = 0;
-        __ cmpdi(CCR0, count, 2<<shift);    // Short arrays (< 8 bytes) fill by element
+        __ cmpdi(CCR0, count, 2<<shift);    // Short arrays (< 8 bytes) fill by element.
        __ blt(CCR0, L_fill_4_bytes);
        break;
      default: ShouldNotReachHere();
    }

    if (!aligned && (t == T_BYTE || t == T_SHORT)) {
-      // align source address at 4 bytes address boundary
+      // Align source address at 4 bytes address boundary.
      if (t == T_BYTE) {
-        // One byte misalignment happens only for byte arrays
+        // One byte misalignment happens only for byte arrays.
        __ andi_(temp, to, 1);
        __ beq(CCR0, L_skip_align1);
        __ stb(value, 0, to);
@ -930,12 +952,12 @@ class StubGenerator: public StubCodeGenerator {
      __ bind(L_fill_32_bytes);
    }

-    __ li(temp, 8<<shift);              // prepare for 32 byte loop
-    // clone bytes int->long as above
-    __ rldimi(value, value, 32, 0);     // 32 bit -> 64 bit
+    __ li(temp, 8<<shift);                  // Prepare for 32 byte loop.
+    // Clone bytes int->long as above.
+    __ rldimi(value, value, 32, 0);         // 32 bit -> 64 bit

    Label L_check_fill_8_bytes;
-    // Fill 32-byte chunks
+    // Fill 32-byte chunks.
    __ subf_(count, temp, count);
    __ blt(CCR0, L_check_fill_8_bytes);

@ -945,7 +967,7 @@ class StubGenerator: public StubCodeGenerator {

    __ std(value, 0, to);
    __ std(value, 8, to);
-    __ subf_(count, temp, count); // update count
+    __ subf_(count, temp, count);           // Update count.
    __ std(value, 16, to);
    __ std(value, 24, to);

@ -968,7 +990,7 @@ class StubGenerator: public StubCodeGenerator {
    __ addi(to, to, 8);
    __ bge(CCR0, L_fill_8_bytes_loop);

-    // fill trailing 4 bytes
+    // Fill trailing 4 bytes.
    __ bind(L_fill_4_bytes);
    __ andi_(temp, count, 1<<shift);
    __ beq(CCR0, L_fill_2_bytes);
@ -976,14 +998,14 @@ class StubGenerator: public StubCodeGenerator {
    __ stw(value, 0, to);
    if (t == T_BYTE || t == T_SHORT) {
      __ addi(to, to, 4);
-      // fill trailing 2 bytes
+      // Fill trailing 2 bytes.
      __ bind(L_fill_2_bytes);
      __ andi_(temp, count, 1<<(shift-1));
      __ beq(CCR0, L_fill_byte);
      __ sth(value, 0, to);
      if (t == T_BYTE) {
        __ addi(to, to, 2);
-        // fill trailing byte
+        // Fill trailing byte.
        __ bind(L_fill_byte);
        __ andi_(count, count, 1);
        __ beq(CCR0, L_exit);
@ -997,7 +1019,7 @@ class StubGenerator: public StubCodeGenerator {
    __ bind(L_exit);
    __ blr();

-    // Handle copies less than 8 bytes.  Int is handled elsewhere.
+    // Handle copies less than 8 bytes. Int is handled elsewhere.
    if (t == T_BYTE) {
      __ bind(L_fill_elements);
      Label L_fill_2, L_fill_4;
@ -1039,7 +1061,7 @@ class StubGenerator: public StubCodeGenerator {
  }


-  // Generate overlap test for array copy stubs
+  // Generate overlap test for array copy stubs.
  //
  // Input:
  //   R3_ARG1    -  from
@ -1873,10 +1895,7 @@ class StubGenerator: public StubCodeGenerator {
      generate_conjoint_long_copy_core(aligned);
    }

-    gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
-
-    __ blr();
-
+    gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
    return start;
  }

@ -1906,9 +1925,7 @@ class StubGenerator: public StubCodeGenerator {
      generate_disjoint_long_copy_core(aligned);
    }

-    gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
-
-    __ blr();
+    gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);

    return start;
  }
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
@ -89,16 +89,17 @@ void VM_Version::initialize() {
  }

  // On Power6 test for section size.
-  if (PowerArchitecturePPC64 == 6)
+  if (PowerArchitecturePPC64 == 6) {
    determine_section_size();
-  // TODO: PPC port else
+  // TODO: PPC port } else {
  // TODO: PPC port PdScheduling::power6SectorSize = 0x20;
+  }

  MaxVectorSize = 8;
 #endif

  // Create and print feature-string.
-  char buf[(num_features+1) * 16]; // max 16 chars per feature
+  char buf[(num_features+1) * 16]; // Max 16 chars per feature.
  jio_snprintf(buf, sizeof(buf),
               "ppc64%s%s%s%s%s%s%s%s",
               (has_fsqrt()   ? " fsqrt"   : ""),
@ -127,21 +128,21 @@ void VM_Version::initialize() {
  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1;

  if (AllocatePrefetchStyle == 4) {
-    AllocatePrefetchStepSize = cache_line_size; // need exact value
-    if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // use larger blocks by default
-    if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // default is not defined ?
+    AllocatePrefetchStepSize = cache_line_size; // Need exact value.
+    if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // Use larger blocks by default.
+    if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // Default is not defined?
  } else {
    if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size;
-    if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value
-    if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // default is not defined ?
+    if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value.
+    if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // Default is not defined?
  }

  assert(AllocatePrefetchLines > 0, "invalid value");
  if (AllocatePrefetchLines < 1) // Set valid value in product VM.
-    AllocatePrefetchLines = 1; // Conservative value
+    AllocatePrefetchLines = 1; // Conservative value.

  if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size)
-    AllocatePrefetchStyle = 1; // fall back if inappropriate
+    AllocatePrefetchStyle = 1; // Fall back if inappropriate.

  assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
 }
@ -160,13 +161,13 @@ void VM_Version::determine_section_size() {

  const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord;

-  // Allocate space for the code
+  // Allocate space for the code.
  ResourceMark rm;
  CodeBuffer cb("detect_section_size", code_size, 0);
  MacroAssembler* a = new MacroAssembler(&cb);

  uint32_t *code = (uint32_t *)a->pc();
-  // emit code.
+  // Emit code.
  void (*test1)() = (void(*)())(void *)a->emit_fd();

  Label l1;
@ -189,58 +190,58 @@ void VM_Version::determine_section_size() {

    // ;;  1
    a->nop();                   // 5
-    a->fmr(F6, F6);     // 6
-    a->fmr(F7, F7);     // 7
+    a->fmr(F6, F6);             // 6
+    a->fmr(F7, F7);             // 7
    a->endgroup();              // 8
    // ------- sector 8 ------------

    // ;;  2
    a->nop();                   // 9
    a->nop();                   // 10
-    a->fmr(F8, F8);     // 11
-    a->fmr(F9, F9);     // 12
+    a->fmr(F8, F8);             // 11
+    a->fmr(F9, F9);             // 12

    // ;;  3
    a->nop();                   // 13
-    a->fmr(F10, F10);   // 14
-    a->fmr(F11, F11);   // 15
+    a->fmr(F10, F10);           // 14
+    a->fmr(F11, F11);           // 15
    a->endgroup();              // 16
    // -------- sector 16 -------------

    // ;;  4
    a->nop();                   // 17
    a->nop();                   // 18
-    a->fmr(F15, F15);   // 19
-    a->fmr(F16, F16);   // 20
+    a->fmr(F15, F15);           // 19
+    a->fmr(F16, F16);           // 20

    // ;;  5
    a->nop();                   // 21
-    a->fmr(F17, F17);   // 22
-    a->fmr(F18, F18);   // 23
+    a->fmr(F17, F17);           // 22
+    a->fmr(F18, F18);           // 23
    a->endgroup();              // 24
    // ------- sector 24  ------------

    // ;;  6
    a->nop();                   // 25
    a->nop();                   // 26
-    a->fmr(F19, F19);     // 27
-    a->fmr(F20, F20);     // 28
+    a->fmr(F19, F19);           // 27
+    a->fmr(F20, F20);           // 28

    // ;;  7
    a->nop();                   // 29
-    a->fmr(F21, F21);   // 30
-    a->fmr(F22, F22);   // 31
+    a->fmr(F21, F21);           // 30
+    a->fmr(F22, F22);           // 31
    a->brnop0();                // 32

    // ------- sector 32 ------------
  }

  // ;; 8
-  a->cmpdi(CCR0, R4, unroll);// 33
-  a->bge(CCR0, l1);         // 34
+  a->cmpdi(CCR0, R4, unroll);   // 33
+  a->bge(CCR0, l1);             // 34
  a->blr();

-  // emit code.
+  // Emit code.
  void (*test2)() = (void(*)())(void *)a->emit_fd();
  // uint32_t *code = (uint32_t *)a->pc();

@ -382,39 +383,40 @@ void VM_Version::determine_section_size() {
 #endif // COMPILER2

 void VM_Version::determine_features() {
-  const int code_size = (num_features+1+2*7)*BytesPerInstWord; // 7 InstWords for each call (function descriptor + blr instruction)
+  // 7 InstWords for each call (function descriptor + blr instruction).
+  const int code_size = (num_features+1+2*7)*BytesPerInstWord;
  int features = 0;

  // create test area
-  enum { BUFFER_SIZE = 2*4*K }; // needs to be >=2* max cache line size (cache line size can't exceed min page size)
+  enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size).
  char test_area[BUFFER_SIZE];
  char *mid_of_test_area = &test_area[BUFFER_SIZE>>1];

-  // Allocate space for the code
+  // Allocate space for the code.
  ResourceMark rm;
  CodeBuffer cb("detect_cpu_features", code_size, 0);
  MacroAssembler* a = new MacroAssembler(&cb);

-  // emit code.
+  // Emit code.
  void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->emit_fd();
  uint32_t *code = (uint32_t *)a->pc();
  // Don't use R0 in ldarx.
-  // keep R3_ARG1 = R3 unmodified, it contains &field (see below)
-  // keep R4_ARG2 = R4 unmodified, it contains offset = 0 (see below)
-  a->fsqrt(F3, F4);                         // code[0] -> fsqrt_m
-  a->isel(R7, R5, R6, 0);                   // code[1] -> isel_m
-  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1);// code[2] -> lxarx_m
-  a->cmpb(R7, R5, R6);                      // code[3] -> bcmp
-  //a->mftgpr(R7, F3);                      // code[4] -> mftgpr
-  a->popcntb(R7, R5);                       // code[5] -> popcntb
-  a->popcntw(R7, R5);                       // code[6] -> popcntw
-  a->fcfids(F3, F4);                        // code[7] -> fcfids
-  a->vand(VR0, VR0, VR0);                   // code[8] -> vand
+  // Keep R3_ARG1 unmodified, it contains &field (see below).
+  // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
+  a->fsqrt(F3, F4);                            // code[0] -> fsqrt_m
+  a->isel(R7, R5, R6, 0);                      // code[1] -> isel_m
+  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[2] -> lxarx_m
+  a->cmpb(R7, R5, R6);                         // code[3] -> bcmp
+  //a->mftgpr(R7, F3);                         // code[4] -> mftgpr
+  a->popcntb(R7, R5);                          // code[5] -> popcntb
+  a->popcntw(R7, R5);                          // code[6] -> popcntw
+  a->fcfids(F3, F4);                           // code[7] -> fcfids
+  a->vand(VR0, VR0, VR0);                      // code[8] -> vand
  a->blr();

-  // Emit function to set one cache line to zero
-  void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd(); // emit function descriptor and get pointer to it
-  a->dcbz(R3_ARG1); // R3_ARG1 = R3 = addr
+  // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
+  void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd();
+  a->dcbz(R3_ARG1); // R3_ARG1 = addr
  a->blr();

  uint32_t *code_end = (uint32_t *)a->pc();
@ -428,8 +430,8 @@ void VM_Version::determine_features() {
  }

  // Measure cache line size.
-  memset(test_area, 0xFF, BUFFER_SIZE); // fill test area with 0xFF
-  (*zero_cacheline_func_ptr)(mid_of_test_area); // call function which executes dcbz to the middle
+  memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF.
+  (*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle.
  int count = 0; // count zeroed bytes
  for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++;
  guarantee(is_power_of_2(count), "cache line size needs to be a power of 2");
--- a/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp
+++ b/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp
@ -113,7 +113,7 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
  // If the vtable entry is null, the method is abstract.
  address ame_addr = __ pc(); // ame = abstract method error

-  __ ld_with_trap_null_check(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
+  __ load_with_trap_null_check(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
  __ mtctr(R12_scratch2);
  __ bctr();
  masm->flush();
@ -147,7 +147,7 @@ VtableStub* VtableStubs::create_itable_stub(int vtable_index) {

  // Entry arguments:
  //  R19_method: Interface
-  //  R3_ARG1: Receiver
+  //  R3_ARG1:    Receiver
  //

  const Register rcvr_klass = R11_scratch1;
--- a/hotspot/src/os_cpu/aix_ppc/vm/orderAccess_aix_ppc.inline.hpp
+++ b/hotspot/src/os_cpu/aix_ppc/vm/orderAccess_aix_ppc.inline.hpp
@ -34,114 +34,114 @@
 //
 // Machine barrier instructions:
 //
-// - ppc_sync            Two-way memory barrier, aka fence.
-// - ppc_lwsync          orders  Store|Store,
-//                                Load|Store,
-//                                Load|Load,
-//                       but not Store|Load
-// - ppc_eieio           orders  Store|Store
-// - ppc_isync           Invalidates speculatively executed instructions,
-//                       but isync may complete before storage accesses
-//                       associated with instructions preceding isync have
-//                       been performed.
+// - sync            Two-way memory barrier, aka fence.
+// - lwsync          orders  Store|Store,
+//                            Load|Store,
+//                            Load|Load,
+//                   but not Store|Load
+// - eieio           orders  Store|Store
+// - isync           Invalidates speculatively executed instructions,
+//                   but isync may complete before storage accesses
+//                   associated with instructions preceding isync have
+//                   been performed.
 //
 // Semantic barrier instructions:
 // (as defined in orderAccess.hpp)
 //
-// - ppc_release         orders Store|Store,       (maps to ppc_lwsync)
-//                               Load|Store
-// - ppc_acquire         orders  Load|Store,       (maps to ppc_lwsync)
-//                               Load|Load
-// - ppc_fence           orders Store|Store,       (maps to ppc_sync)
-//                               Load|Store,
-//                               Load|Load,
-//                              Store|Load
+// - release         orders Store|Store,       (maps to lwsync)
+//                           Load|Store
+// - acquire         orders  Load|Store,       (maps to lwsync)
+//                           Load|Load
+// - fence           orders Store|Store,       (maps to sync)
+//                           Load|Store,
+//                           Load|Load,
+//                          Store|Load
 //

-#define inlasm_ppc_sync()     __asm__ __volatile__ ("sync"   : : : "memory");
-#define inlasm_ppc_lwsync()   __asm__ __volatile__ ("lwsync" : : : "memory");
-#define inlasm_ppc_eieio()    __asm__ __volatile__ ("eieio"  : : : "memory");
-#define inlasm_ppc_isync()    __asm__ __volatile__ ("isync"  : : : "memory");
-#define inlasm_ppc_release()  inlasm_ppc_lwsync();
-#define inlasm_ppc_acquire()  inlasm_ppc_lwsync();
+#define inlasm_sync()     __asm__ __volatile__ ("sync"   : : : "memory");
+#define inlasm_lwsync()   __asm__ __volatile__ ("lwsync" : : : "memory");
+#define inlasm_eieio()    __asm__ __volatile__ ("eieio"  : : : "memory");
+#define inlasm_isync()    __asm__ __volatile__ ("isync"  : : : "memory");
+#define inlasm_release()  inlasm_lwsync();
+#define inlasm_acquire()  inlasm_lwsync();
 // Use twi-isync for load_acquire (faster than lwsync).
 // ATTENTION: seems like xlC 10.1 has problems with this inline assembler macro (VerifyMethodHandles found "bad vminfo in AMH.conv"):
-// #define inlasm_ppc_acquire_reg(X) __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (X) : "memory");
-#define inlasm_ppc_acquire_reg(X) inlasm_ppc_lwsync();
-#define inlasm_ppc_fence()    inlasm_ppc_sync();
+// #define inlasm_acquire_reg(X) __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (X) : "memory");
+#define inlasm_acquire_reg(X) inlasm_lwsync();
+#define inlasm_fence()    inlasm_sync();

-inline void     OrderAccess::loadload()   { inlasm_ppc_lwsync();  }
-inline void     OrderAccess::storestore() { inlasm_ppc_lwsync();  }
-inline void     OrderAccess::loadstore()  { inlasm_ppc_lwsync();  }
-inline void     OrderAccess::storeload()  { inlasm_ppc_fence();   }
+inline void     OrderAccess::loadload()   { inlasm_lwsync();  }
+inline void     OrderAccess::storestore() { inlasm_lwsync();  }
+inline void     OrderAccess::loadstore()  { inlasm_lwsync();  }
+inline void     OrderAccess::storeload()  { inlasm_fence();   }

-inline void     OrderAccess::acquire()    { inlasm_ppc_acquire(); }
-inline void     OrderAccess::release()    { inlasm_ppc_release(); }
-inline void     OrderAccess::fence()      { inlasm_ppc_fence();   }
+inline void     OrderAccess::acquire()    { inlasm_acquire(); }
+inline void     OrderAccess::release()    { inlasm_release(); }
+inline void     OrderAccess::fence()      { inlasm_fence();   }

-inline jbyte    OrderAccess::load_acquire(volatile jbyte*   p) { register jbyte t = *p;   inlasm_ppc_acquire_reg(t); return t; }
-inline jshort   OrderAccess::load_acquire(volatile jshort*  p) { register jshort t = *p;  inlasm_ppc_acquire_reg(t); return t; }
-inline jint     OrderAccess::load_acquire(volatile jint*    p) { register jint t = *p;    inlasm_ppc_acquire_reg(t); return t; }
-inline jlong    OrderAccess::load_acquire(volatile jlong*   p) { register jlong t = *p;   inlasm_ppc_acquire_reg(t); return t; }
-inline jubyte   OrderAccess::load_acquire(volatile jubyte*  p) { register jubyte t = *p;  inlasm_ppc_acquire_reg(t); return t; }
-inline jushort  OrderAccess::load_acquire(volatile jushort* p) { register jushort t = *p; inlasm_ppc_acquire_reg(t); return t; }
-inline juint    OrderAccess::load_acquire(volatile juint*   p) { register juint t = *p;   inlasm_ppc_acquire_reg(t); return t; }
+inline jbyte    OrderAccess::load_acquire(volatile jbyte*   p) { register jbyte t = *p;   inlasm_acquire_reg(t); return t; }
+inline jshort   OrderAccess::load_acquire(volatile jshort*  p) { register jshort t = *p;  inlasm_acquire_reg(t); return t; }
+inline jint     OrderAccess::load_acquire(volatile jint*    p) { register jint t = *p;    inlasm_acquire_reg(t); return t; }
+inline jlong    OrderAccess::load_acquire(volatile jlong*   p) { register jlong t = *p;   inlasm_acquire_reg(t); return t; }
+inline jubyte   OrderAccess::load_acquire(volatile jubyte*  p) { register jubyte t = *p;  inlasm_acquire_reg(t); return t; }
+inline jushort  OrderAccess::load_acquire(volatile jushort* p) { register jushort t = *p; inlasm_acquire_reg(t); return t; }
+inline juint    OrderAccess::load_acquire(volatile juint*   p) { register juint t = *p;   inlasm_acquire_reg(t); return t; }
 inline julong   OrderAccess::load_acquire(volatile julong*  p) { return (julong)load_acquire((volatile jlong*)p); }
-inline jfloat   OrderAccess::load_acquire(volatile jfloat*  p) { register jfloat t = *p;  inlasm_ppc_acquire(); return t; }
-inline jdouble  OrderAccess::load_acquire(volatile jdouble* p) { register jdouble t = *p; inlasm_ppc_acquire(); return t; }
+inline jfloat   OrderAccess::load_acquire(volatile jfloat*  p) { register jfloat t = *p;  inlasm_acquire(); return t; }
+inline jdouble  OrderAccess::load_acquire(volatile jdouble* p) { register jdouble t = *p; inlasm_acquire(); return t; }

 inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t*   p) { return (intptr_t)load_acquire((volatile jlong*)p); }
 inline void*    OrderAccess::load_ptr_acquire(volatile void*       p) { return (void*)   load_acquire((volatile jlong*)p); }
 inline void*    OrderAccess::load_ptr_acquire(const volatile void* p) { return (void*)   load_acquire((volatile jlong*)p); }

-inline void     OrderAccess::release_store(volatile jbyte*   p, jbyte   v) { inlasm_ppc_release(); *p = v; }
-inline void     OrderAccess::release_store(volatile jshort*  p, jshort  v) { inlasm_ppc_release(); *p = v; }
-inline void     OrderAccess::release_store(volatile jint*    p, jint    v) { inlasm_ppc_release(); *p = v; }
-inline void     OrderAccess::release_store(volatile jlong*   p, jlong   v) { inlasm_ppc_release(); *p = v; }
-inline void     OrderAccess::release_store(volatile jubyte*  p, jubyte  v) { inlasm_ppc_release(); *p = v; }
-inline void     OrderAccess::release_store(volatile jushort* p, jushort v) { inlasm_ppc_release(); *p = v; }
-inline void     OrderAccess::release_store(volatile juint*   p, juint   v) { inlasm_ppc_release(); *p = v; }
-inline void     OrderAccess::release_store(volatile julong*  p, julong  v) { inlasm_ppc_release(); *p = v; }
-inline void     OrderAccess::release_store(volatile jfloat*  p, jfloat  v) { inlasm_ppc_release(); *p = v; }
-inline void     OrderAccess::release_store(volatile jdouble* p, jdouble v) { inlasm_ppc_release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jbyte*   p, jbyte   v) { inlasm_release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jshort*  p, jshort  v) { inlasm_release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jint*    p, jint    v) { inlasm_release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jlong*   p, jlong   v) { inlasm_release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jubyte*  p, jubyte  v) { inlasm_release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jushort* p, jushort v) { inlasm_release(); *p = v; }
+inline void     OrderAccess::release_store(volatile juint*   p, juint   v) { inlasm_release(); *p = v; }
+inline void     OrderAccess::release_store(volatile julong*  p, julong  v) { inlasm_release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jfloat*  p, jfloat  v) { inlasm_release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jdouble* p, jdouble v) { inlasm_release(); *p = v; }

-inline void     OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { inlasm_ppc_release(); *p = v; }
-inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v) { inlasm_ppc_release(); *(void* volatile *)p = v; }
+inline void     OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { inlasm_release(); *p = v; }
+inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v) { inlasm_release(); *(void* volatile *)p = v; }

-inline void     OrderAccess::store_fence(jbyte*   p, jbyte   v) { *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::store_fence(jshort*  p, jshort  v) { *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::store_fence(jint*    p, jint    v) { *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::store_fence(jlong*   p, jlong   v) { *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::store_fence(jubyte*  p, jubyte  v) { *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::store_fence(jushort* p, jushort v) { *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::store_fence(juint*   p, juint   v) { *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::store_fence(julong*  p, julong  v) { *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::store_fence(jfloat*  p, jfloat  v) { *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; inlasm_ppc_fence(); }
+inline void     OrderAccess::store_fence(jbyte*   p, jbyte   v) { *p = v; inlasm_fence(); }
+inline void     OrderAccess::store_fence(jshort*  p, jshort  v) { *p = v; inlasm_fence(); }
+inline void     OrderAccess::store_fence(jint*    p, jint    v) { *p = v; inlasm_fence(); }
+inline void     OrderAccess::store_fence(jlong*   p, jlong   v) { *p = v; inlasm_fence(); }
+inline void     OrderAccess::store_fence(jubyte*  p, jubyte  v) { *p = v; inlasm_fence(); }
+inline void     OrderAccess::store_fence(jushort* p, jushort v) { *p = v; inlasm_fence(); }
+inline void     OrderAccess::store_fence(juint*   p, juint   v) { *p = v; inlasm_fence(); }
+inline void     OrderAccess::store_fence(julong*  p, julong  v) { *p = v; inlasm_fence(); }
+inline void     OrderAccess::store_fence(jfloat*  p, jfloat  v) { *p = v; inlasm_fence(); }
+inline void     OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; inlasm_fence(); }

-inline void     OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::store_ptr_fence(void**    p, void*    v) { *p = v; inlasm_ppc_fence(); }
+inline void     OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; inlasm_fence(); }
+inline void     OrderAccess::store_ptr_fence(void**    p, void*    v) { *p = v; inlasm_fence(); }

-inline void     OrderAccess::release_store_fence(volatile jbyte*   p, jbyte   v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::release_store_fence(volatile jshort*  p, jshort  v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::release_store_fence(volatile jint*    p, jint    v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::release_store_fence(volatile jlong*   p, jlong   v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::release_store_fence(volatile jubyte*  p, jubyte  v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::release_store_fence(volatile jushort* p, jushort v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::release_store_fence(volatile juint*   p, juint   v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
+inline void     OrderAccess::release_store_fence(volatile jbyte*   p, jbyte   v) { inlasm_release(); *p = v; inlasm_fence(); }
+inline void     OrderAccess::release_store_fence(volatile jshort*  p, jshort  v) { inlasm_release(); *p = v; inlasm_fence(); }
+inline void     OrderAccess::release_store_fence(volatile jint*    p, jint    v) { inlasm_release(); *p = v; inlasm_fence(); }
+inline void     OrderAccess::release_store_fence(volatile jlong*   p, jlong   v) { inlasm_release(); *p = v; inlasm_fence(); }
+inline void     OrderAccess::release_store_fence(volatile jubyte*  p, jubyte  v) { inlasm_release(); *p = v; inlasm_fence(); }
+inline void     OrderAccess::release_store_fence(volatile jushort* p, jushort v) { inlasm_release(); *p = v; inlasm_fence(); }
+inline void     OrderAccess::release_store_fence(volatile juint*   p, juint   v) { inlasm_release(); *p = v; inlasm_fence(); }
+inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { inlasm_release(); *p = v; inlasm_fence(); }
+inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { inlasm_release(); *p = v; inlasm_fence(); }
+inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { inlasm_release(); *p = v; inlasm_fence(); }

-inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
-inline void     OrderAccess::release_store_ptr_fence(volatile void*     p, void*    v) { inlasm_ppc_release(); *(void* volatile *)p = v; inlasm_ppc_fence(); }
+inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { inlasm_release(); *p = v; inlasm_fence(); }
+inline void     OrderAccess::release_store_ptr_fence(volatile void*     p, void*    v) { inlasm_release(); *(void* volatile *)p = v; inlasm_fence(); }

-#undef inlasm_ppc_sync
-#undef inlasm_ppc_lwsync
-#undef inlasm_ppc_eieio
-#undef inlasm_ppc_isync
-#undef inlasm_ppc_release
-#undef inlasm_ppc_acquire
-#undef inlasm_ppc_fence
+#undef inlasm_sync
+#undef inlasm_lwsync
+#undef inlasm_eieio
+#undef inlasm_isync
+#undef inlasm_release
+#undef inlasm_acquire
+#undef inlasm_fence

 #endif // OS_CPU_AIX_OJDKPPC_VM_ORDERACCESS_AIX_PPC_INLINE_HPP
--- a/hotspot/src/os_cpu/aix_ppc/vm/os_aix_ppc.cpp
+++ b/hotspot/src/os_cpu/aix_ppc/vm/os_aix_ppc.cpp
@ -67,7 +67,7 @@ address os::current_stack_pointer() {
  address csp;

 #if !defined(USE_XLC_BUILTINS)
-  // inline assembly for `ppc_mr regno(csp), PPC_SP':
+  // inline assembly for `mr regno(csp), R1_SP':
  __asm__ __volatile__ ("mr %0, 1":"=r"(csp):);
 #else
  csp = (address) __builtin_frame_address(0);
@ -263,7 +263,7 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
        tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
        goto report_and_die;
      } else {
-        // this means a segv happened inside our stack, but not in
+        // This means a segv happened inside our stack, but not in
        // the guarded zone. I'd like to know when this happens,
        tty->print_raw_cr("SIGSEGV happened inside stack but outside yellow and red zone.");
        goto report_and_die;
@ -312,53 +312,57 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
      // in the zero page, because it is filled with 0x0. We ignore
      // explicit SIGILLs in the zero page.
      if (sig == SIGILL && (pc < (address) 0x200)) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_raw_cr("SIGILL happened inside zero page.");
+        }
        goto report_and_die;
      }

      // Handle signal from NativeJump::patch_verified_entry().
      if (( TrapBasedNotEntrantChecks && sig == SIGTRAP && nativeInstruction_at(pc)->is_sigtrap_zombie_not_entrant()) ||
          (!TrapBasedNotEntrantChecks && sig == SIGILL  && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant())) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
+        }
        stub = SharedRuntime::get_handle_wrong_method_stub();
        goto run_stub;
      }

      else if (sig == SIGSEGV && os::is_poll_address(addr)) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_cr("trap: safepoint_poll at " INTPTR_FORMAT " (SIGSEGV)", pc);
+        }
        stub = SharedRuntime::get_poll_stub(pc);
        goto run_stub;
      }

-      // SIGTRAP-based ic miss check in compiled code
+      // SIGTRAP-based ic miss check in compiled code.
      else if (sig == SIGTRAP && TrapBasedICMissChecks &&
               nativeInstruction_at(pc)->is_sigtrap_ic_miss_check()) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_cr("trap: ic_miss_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
+        }
        stub = SharedRuntime::get_ic_miss_stub();
        goto run_stub;
      }

-#ifdef COMPILER2
      // SIGTRAP-based implicit null check in compiled code.
      else if (sig == SIGTRAP && TrapBasedNullChecks &&
               nativeInstruction_at(pc)->is_sigtrap_null_check()) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
+        }
        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
        goto run_stub;
      }
-#endif

      // SIGSEGV-based implicit null check in compiled code.
      else if (sig == SIGSEGV && ImplicitNullChecks &&
               CodeCache::contains((void*) pc) &&
               !MacroAssembler::needs_explicit_null_check((intptr_t) info->si_addr)) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGSEGV)", pc);
+        }
        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
      }

@ -366,8 +370,9 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
      // SIGTRAP-based implicit range check in compiled code.
      else if (sig == SIGTRAP && TrapBasedRangeChecks &&
               nativeInstruction_at(pc)->is_sigtrap_range_check()) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_cr("trap: range_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
+        }
        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
        goto run_stub;
      }
--- a/hotspot/src/os_cpu/linux_ppc/vm/atomic_linux_ppc.inline.hpp
+++ b/hotspot/src/os_cpu/linux_ppc/vm/atomic_linux_ppc.inline.hpp
@ -58,35 +58,35 @@ inline jlong Atomic::load(volatile jlong* src) { return *src; }

  - sync            two-way memory barrier, aka fence
  - lwsync          orders  Store|Store,
-                                 Load|Store,
-                                 Load|Load,
-                        but not Store|Load
+                             Load|Store,
+                             Load|Load,
+                    but not Store|Load
  - eieio           orders memory accesses for device memory (only)
  - isync           invalidates speculatively executed instructions
-                        From the POWER ISA 2.06 documentation:
-                         "[...] an isync instruction prevents the execution of
-                        instructions following the isync until instructions
-                        preceding the isync have completed, [...]"
-                        From IBM's AIX assembler reference:
-                         "The isync [...] instructions causes the processor to
-                        refetch any instructions that might have been fetched
-                        prior to the isync instruction. The instruction isync
-                        causes the processor to wait for all previous instructions
-                        to complete. Then any instructions already fetched are
-                        discarded and instruction processing continues in the
-                        environment established by the previous instructions."
+                    From the POWER ISA 2.06 documentation:
+                     "[...] an isync instruction prevents the execution of
+                    instructions following the isync until instructions
+                    preceding the isync have completed, [...]"
+                    From IBM's AIX assembler reference:
+                     "The isync [...] instructions causes the processor to
+                    refetch any instructions that might have been fetched
+                    prior to the isync instruction. The instruction isync
+                    causes the processor to wait for all previous instructions
+                    to complete. Then any instructions already fetched are
+                    discarded and instruction processing continues in the
+                    environment established by the previous instructions."

  semantic barrier instructions:
  (as defined in orderAccess.hpp)

  - release         orders Store|Store,       (maps to lwsync)
-                                Load|Store
+                            Load|Store
  - acquire         orders  Load|Store,       (maps to lwsync)
-                                Load|Load
+                            Load|Load
  - fence           orders Store|Store,       (maps to sync)
-                                Load|Store,
-                                Load|Load,
-                               Store|Load
+                            Load|Store,
+                            Load|Load,
+                           Store|Load
 */

 #define strasm_sync                       "\n  sync    \n"
--- a/hotspot/src/os_cpu/linux_ppc/vm/orderAccess_linux_ppc.inline.hpp
+++ b/hotspot/src/os_cpu/linux_ppc/vm/orderAccess_linux_ppc.inline.hpp
@ -40,26 +40,26 @@
 //
 // - sync            Two-way memory barrier, aka fence.
 // - lwsync          orders  Store|Store,
-//                                Load|Store,
-//                                Load|Load,
-//                       but not Store|Load
+//                            Load|Store,
+//                            Load|Load,
+//                   but not Store|Load
 // - eieio           orders  Store|Store
 // - isync           Invalidates speculatively executed instructions,
-//                       but isync may complete before storage accesses
-//                       associated with instructions preceding isync have
-//                       been performed.
+//                   but isync may complete before storage accesses
+//                   associated with instructions preceding isync have
+//                   been performed.
 //
 // Semantic barrier instructions:
 // (as defined in orderAccess.hpp)
 //
 // - release         orders Store|Store,       (maps to lwsync)
-//                               Load|Store
+//                           Load|Store
 // - acquire         orders  Load|Store,       (maps to lwsync)
-//                               Load|Load
+//                           Load|Load
 // - fence           orders Store|Store,       (maps to sync)
-//                               Load|Store,
-//                               Load|Load,
-//                              Store|Load
+//                           Load|Store,
+//                           Load|Load,
+//                          Store|Load
 //

 #define inlasm_sync()     __asm__ __volatile__ ("sync"   : : : "memory");
--- a/hotspot/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp
+++ b/hotspot/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp
@ -284,16 +284,18 @@ JVM_handle_linux_signal(int sig,
      // in the zero page, because it is filled with 0x0. We ignore
      // explicit SIGILLs in the zero page.
      if (sig == SIGILL && (pc < (address) 0x200)) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_raw_cr("SIGILL happened inside zero page.");
+        }
        goto report_and_die;
      }

      // Handle signal from NativeJump::patch_verified_entry().
      if (( TrapBasedNotEntrantChecks && sig == SIGTRAP && nativeInstruction_at(pc)->is_sigtrap_zombie_not_entrant()) ||
          (!TrapBasedNotEntrantChecks && sig == SIGILL  && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant())) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
+        }
        stub = SharedRuntime::get_handle_wrong_method_stub();
      }

@ -304,24 +306,27 @@ JVM_handle_linux_signal(int sig,
               //   (address)info->si_addr == os::get_standard_polling_page()
               // doesn't work for us. We use:
               ((NativeInstruction*)pc)->is_safepoint_poll()) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_cr("trap: safepoint_poll at " INTPTR_FORMAT " (SIGSEGV)", pc);
+        }
        stub = SharedRuntime::get_poll_stub(pc);
      }

      // SIGTRAP-based ic miss check in compiled code.
      else if (sig == SIGTRAP && TrapBasedICMissChecks &&
               nativeInstruction_at(pc)->is_sigtrap_ic_miss_check()) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_cr("trap: ic_miss_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
+        }
        stub = SharedRuntime::get_ic_miss_stub();
      }

      // SIGTRAP-based implicit null check in compiled code.
      else if (sig == SIGTRAP && TrapBasedNullChecks &&
               nativeInstruction_at(pc)->is_sigtrap_null_check()) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
+        }
        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
      }

@ -329,8 +334,9 @@ JVM_handle_linux_signal(int sig,
      else if (sig == SIGSEGV && ImplicitNullChecks &&
               CodeCache::contains((void*) pc) &&
               !MacroAssembler::needs_explicit_null_check((intptr_t) info->si_addr)) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGSEGV)", pc);
+        }
        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
      }

@ -338,8 +344,9 @@ JVM_handle_linux_signal(int sig,
      // SIGTRAP-based implicit range check in compiled code.
      else if (sig == SIGTRAP && TrapBasedRangeChecks &&
               nativeInstruction_at(pc)->is_sigtrap_range_check()) {
-        if (TraceTraps)
+        if (TraceTraps) {
          tty->print_cr("trap: range_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
+        }
        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
      }
 #endif