Merge

2016-12-08 15:49:29 +01:00 · 2016-12-08 15:49:29 +01:00 · f09c55c0d8
commit f09c55c0d8
parent 95b2647798 bdf15eb4b2
223 changed files with 3109 additions and 1504 deletions
--- a/hotspot/make/BuildHotspot.gmk
+++ b/hotspot/make/BuildHotspot.gmk
@ -1,51 +0,0 @@
-#
-# Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation.  Oracle designates this
-# particular file as subject to the "Classpath" exception as provided
-# by Oracle in the LICENSE file that accompanied this code.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-
-# This must be the first rule
-default: all
-
-include $(SPEC)
-include MakeBase.gmk
-
-VARIANT_TARGETS := $(foreach v, $(JVM_VARIANTS), variant-$v)
-VARIANT_GENSRC_TARGETS := $(addsuffix -gensrc, $(VARIANT_TARGETS))
-VARIANT_LIBS_TARGETS := $(addsuffix -libs, $(VARIANT_TARGETS))
-
-$(VARIANT_GENSRC_TARGETS): variant-%-gensrc:
-	$(call LogWarn, Building JVM variant '$*' with features '$(JVM_FEATURES_$*)')
-	+$(MAKE) -f gensrc/GenerateSources.gmk JVM_VARIANT=$*
-
-$(VARIANT_LIBS_TARGETS): variant-%-libs: variant-%-gensrc
-	+$(MAKE) -f lib/CompileLibraries.gmk JVM_VARIANT=$*
-
-$(VARIANT_TARGETS): variant-%: variant-%-gensrc variant-%-libs
-
-jsig:
-	+$(MAKE) -f lib/CompileLibjsig.gmk
-
-all: $(VARIANT_TARGETS) jsig
-
-.PHONY: $(VARIANT_TARGETS) $(VARIANT_GENSRC_TARGETS) $(VARIANT_LIBS_TARGETS) \
-    jsig all
--- a/hotspot/make/lib/CompileGtest.gmk
+++ b/hotspot/make/lib/CompileGtest.gmk
@ -107,6 +107,7 @@ $(eval $(call SetupNativeCompilation, BUILD_GTEST_LAUNCHER, \
    LDFLAGS := $(LDFLAGS_JDKEXE), \
    LDFLAGS_unix := -L$(JVM_OUTPUTDIR)/gtest $(call SET_SHARED_LIBRARY_ORIGIN), \
    LDFLAGS_solaris := -library=stlport4, \
+    LIBS_linux := $(LIBCXX), \
    LIBS_unix := -ljvm, \
    LIBS_windows := $(JVM_OUTPUTDIR)/gtest/objs/jvm.lib, \
    COPY_DEBUG_SYMBOLS := $(GTEST_COPY_DEBUG_SYMBOLS), \
--- a/hotspot/make/lib/CompileLibjsig.gmk
+++ b/hotspot/make/lib/CompileLibjsig.gmk
@ -48,6 +48,12 @@ ifneq ($(OPENJDK_TARGET_OS), windows)
        LIBJSIG_CPU_FLAGS := -m64
      else ifeq ($(OPENJDK_TARGET_CPU), x86)
        LIBJSIG_CPU_FLAGS := -m32 -march=i586
+      else ifeq ($(OPENJDK_TARGET_CPU), ppc64)
+        LIBJSIG_CPU_FLAGS := -mcpu=powerpc64 -mtune=power5
+      else ifeq ($(OPENJDK_TARGET_CPU), ppc64le)
+        LIBJSIG_CPU_FLAGS := -DABI_ELFv2 -mcpu=power8 -mtune=power8
+      else ifeq ($(OPENJDK_TARGET_CPU), s390x)
+        LIBJSIG_CPU_FLAGS := -mbackchain -march=z10
      endif

    else ifeq ($(OPENJDK_TARGET_OS), solaris)
--- a/hotspot/make/test/JtregNative.gmk
+++ b/hotspot/make/test/JtregNative.gmk
@ -53,7 +53,6 @@ BUILD_HOTSPOT_JTREG_NATIVE_SRC := \
    $(HOTSPOT_TOPDIR)/test/runtime/BoolReturn \
    $(HOTSPOT_TOPDIR)/test/compiler/floatingpoint/ \
    $(HOTSPOT_TOPDIR)/test/compiler/calls \
-    $(HOTSPOT_TOPDIR)/test/compiler/native \
    $(HOTSPOT_TOPDIR)/test/serviceability/jvmti/GetNamedModule \
    $(HOTSPOT_TOPDIR)/test/serviceability/jvmti/AddModuleReads \
    $(HOTSPOT_TOPDIR)/test/serviceability/jvmti/AddModuleExportsAndOpens \
@ -97,7 +96,7 @@ ifeq ($(OPENJDK_TARGET_OS), linux)
    BUILD_HOTSPOT_JTREG_LIBRARIES_LDFLAGS_libtest-rwx := -z execstack
    BUILD_HOTSPOT_JTREG_EXECUTABLES_LIBS_exeinvoke := -ljvm -lpthread
    BUILD_TEST_invoke_exeinvoke.c_OPTIMIZATION := NONE
-    BUILD_HOTSPOT_JTREG_EXECUTABLES_LDFLAGS_exeFPRegs := -ldl
+    BUILD_HOTSPOT_JTREG_EXECUTABLES_LIBS_exeFPRegs := -ldl
 endif

 ifeq ($(OPENJDK_TARGET_OS), windows)
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad
@ -9646,6 +9646,10 @@ instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegN


 // ---------------------------------------------------------------------
+
+
+// BEGIN This section of the file is automatically generated. Do not edit --------------
+
 // Sundry CAS operations.  Note that release is always true,
 // regardless of the memory ordering of the CAS.  This is because we
 // need the volatile case to be sequentially consistent but there is
@ -9656,10 +9660,11 @@ instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegN
 // This section is generated from aarch64_ad_cas.m4


-instruct compareAndExchangeB(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
+
+instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
  ins_cost(2 * VOLATILE_REF_COST);
-  effect(KILL cr);
+  effect(TEMP_DEF res, KILL cr);
  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
  %}
@ -9673,10 +9678,10 @@ instruct compareAndExchangeB(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_
  ins_pipe(pipe_slow);
 %}

-instruct compareAndExchangeS(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
+instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
  ins_cost(2 * VOLATILE_REF_COST);
-  effect(KILL cr);
+  effect(TEMP_DEF res, KILL cr);
  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
  %}
@ -9690,10 +9695,10 @@ instruct compareAndExchangeS(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_
  ins_pipe(pipe_slow);
 %}

-instruct compareAndExchangeI(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
+instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
  ins_cost(2 * VOLATILE_REF_COST);
-  effect(KILL cr);
+  effect(TEMP_DEF res, KILL cr);
  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
  %}
@ -9705,10 +9710,10 @@ instruct compareAndExchangeI(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_
  ins_pipe(pipe_slow);
 %}

-instruct compareAndExchangeL(iRegL_R0 res, indirect mem, iRegL_R2 oldval, iRegL_R3 newval, rFlagsReg cr) %{
+instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
  ins_cost(2 * VOLATILE_REF_COST);
-  effect(KILL cr);
+  effect(TEMP_DEF res, KILL cr);
  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
  %}
@ -9720,10 +9725,10 @@ instruct compareAndExchangeL(iRegL_R0 res, indirect mem, iRegL_R2 oldval, iRegL_
  ins_pipe(pipe_slow);
 %}

-instruct compareAndExchangeN(iRegN_R0 res, indirect mem, iRegN_R2 oldval, iRegN_R3 newval, rFlagsReg cr) %{
+instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
  ins_cost(2 * VOLATILE_REF_COST);
-  effect(KILL cr);
+  effect(TEMP_DEF res, KILL cr);
  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
  %}
@ -9735,10 +9740,10 @@ instruct compareAndExchangeN(iRegN_R0 res, indirect mem, iRegN_R2 oldval, iRegN_
  ins_pipe(pipe_slow);
 %}

-instruct compareAndExchangeP(iRegP_R0 res, indirect mem, iRegP_R2 oldval, iRegP_R3 newval, rFlagsReg cr) %{
+instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
  ins_cost(2 * VOLATILE_REF_COST);
-  effect(KILL cr);
+  effect(TEMP_DEF res, KILL cr);
  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
  %}
@ -9853,6 +9858,8 @@ instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP ne
  %}
  ins_pipe(pipe_slow);
 %}
+
+// END This section of the file is automatically generated. Do not edit --------------
 // ---------------------------------------------------------------------

 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
@ -848,7 +848,7 @@ public:
  // architecture.  In debug mode we shrink it in order to test
  // trampolines, but not so small that branches in the interpreter
  // are out of range.
-  static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
+  static const unsigned long branch_range = INCLUDE_JVMCI ? 128 * M : NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);

  static bool reachable_from_branch_at(address branch, address target) {
    return uabs(target - branch) < branch_range;
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
@ -2249,6 +2249,25 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
    __ cbz(dst, *stub->entry());
  }

+  // If the compiler was not able to prove that exact type of the source or the destination
+  // of the arraycopy is an array type, check at runtime if the source or the destination is
+  // an instance type.
+  if (flags & LIR_OpArrayCopy::type_check) {
+    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
+      __ load_klass(tmp, dst);
+      __ ldrw(rscratch1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+      __ cmpw(rscratch1, Klass::_lh_neutral_value);
+      __ br(Assembler::GE, *stub->entry());
+    }
+
+    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
+      __ load_klass(tmp, src);
+      __ ldrw(rscratch1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+      __ cmpw(rscratch1, Klass::_lh_neutral_value);
+      __ br(Assembler::GE, *stub->entry());
+    }
+  }
+
  // check if negative
  if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
    __ cmpw(src_pos, 0);
--- a/hotspot/src/cpu/aarch64/vm/cas.m4
+++ b/hotspot/src/cpu/aarch64/vm/cas.m4
@ -1,3 +1,31 @@
+dnl Copyright (c) 2016, Red Hat Inc. All rights reserved.
+dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+dnl
+dnl This code is free software; you can redistribute it and/or modify it
+dnl under the terms of the GNU General Public License version 2 only, as
+dnl published by the Free Software Foundation.
+dnl
+dnl This code is distributed in the hope that it will be useful, but WITHOUT
+dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+dnl FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl version 2 for more details (a copy is included in the LICENSE file that
+dnl accompanied this code).
+dnl
+dnl You should have received a copy of the GNU General Public License version
+dnl 2 along with this work; if not, write to the Free Software Foundation,
+dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+dnl
+dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+dnl or visit www.oracle.com if you need additional information or have any
+dnl questions.
+dnl
+dnl 
+dnl Process this file with m4 cas.m4 to generate the CAE and wCAS
+dnl instructions used in aarch64.ad.
+dnl
+
+// BEGIN This section of the file is automatically generated. Do not edit --------------
+
 // Sundry CAS operations.  Note that release is always true,
 // regardless of the memory ordering of the CAS.  This is because we
 // need the volatile case to be sequentially consistent but there is
@ -5,13 +33,16 @@
 // can't check the type of memory ordering here, so we always emit a
 // STLXR.

+// This section is generated from aarch64_ad_cas.m4
+
+
 define(`CAS_INSN',
 `
-instruct compareAndExchange$1$5(iReg$2_R0 res, indirect mem, iReg$2_R2 oldval, iReg$2_R3 newval, rFlagsReg cr) %{
+instruct compareAndExchange$1$5(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
  match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
  ifelse($5,Acq,'  predicate(needs_acquiring_load_exclusive(n));
  ins_cost(VOLATILE_REF_COST);`,'  ins_cost(2 * VOLATILE_REF_COST);`)
-  effect(KILL cr);
+  effect(TEMP_DEF res, KILL cr);
  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
  %}
@ -24,11 +55,11 @@ instruct compareAndExchange$1$5(iReg$2_R0 res, indirect mem, iReg$2_R2 oldval, i
 %}')dnl
 define(`CAS_INSN4',
 `
-instruct compareAndExchange$1$7(iReg$2_R0 res, indirect mem, iReg$2_R2 oldval, iReg$2_R3 newval, rFlagsReg cr) %{
+instruct compareAndExchange$1$7(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
  match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
  ifelse($7,Acq,'  predicate(needs_acquiring_load_exclusive(n));
  ins_cost(VOLATILE_REF_COST);`,'  ins_cost(2 * VOLATILE_REF_COST);`)
-  effect(KILL cr);
+  effect(TEMP_DEF res, KILL cr);
  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
  %}
@ -107,3 +138,5 @@ dnl CAS_INSN3(L,L,long,xword,Acq)
 dnl CAS_INSN3(N,N,narrow oop,word,Acq)
 dnl CAS_INSN3(P,P,ptr,xword,Acq)
 dnl
+
+// END This section of the file is automatically generated. Do not edit --------------
--- a/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp
@ -407,10 +407,8 @@ void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register
    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
    // compiled code in threads for which the event is enabled.  Check here for
    // interp_only_mode if these events CAN be enabled.
-    // interp_only is an int, on little endian it is sufficient to test the byte only
-    // Is a cmpl faster?
-    ldr(rscratch1, Address(rthread, JavaThread::interp_only_mode_offset()));
-    cbz(rscratch1, run_compiled_code);
+    ldrw(rscratch1, Address(rthread, JavaThread::interp_only_mode_offset()));
+    cbzw(rscratch1, run_compiled_code);
    ldr(rscratch1, Address(method, Method::interpreter_entry_offset()));
    br(rscratch1);
    bind(run_compiled_code);
--- a/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp
@ -41,28 +41,34 @@ jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Hand

 void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
  address pc = _instructions->start() + pc_offset;
+#ifdef ASSERT
+  {
+    NativeInstruction *insn = nativeInstruction_at(pc);
+    if (HotSpotObjectConstantImpl::compressed(constant)) {
+      // Mov narrow constant: movz n << 16, movk
+      assert(Instruction_aarch64::extract(insn->encoding(), 31, 21) == 0b11010010101 &&
+             nativeInstruction_at(pc+4)->is_movk(), "wrong insn in patch");
+    } else {
+      // Move wide constant: movz n, movk, movk.
+      assert(nativeInstruction_at(pc+4)->is_movk()
+             && nativeInstruction_at(pc+8)->is_movk(), "wrong insn in patch");
+    }
+  }
+#endif // ASSERT
  Handle obj = HotSpotObjectConstantImpl::object(constant);
  jobject value = JNIHandles::make_local(obj());
-  if (HotSpotObjectConstantImpl::compressed(constant)) {
-    int oop_index = _oop_recorder->find_index(value);
-    RelocationHolder rspec = oop_Relocation::spec(oop_index);
-    _instructions->relocate(pc, rspec, 1);
-    Unimplemented();
-  } else {
-    NativeMovConstReg* move = nativeMovConstReg_at(pc);
-    move->set_data((intptr_t) value);
-    int oop_index = _oop_recorder->find_index(value);
-    RelocationHolder rspec = oop_Relocation::spec(oop_index);
-    _instructions->relocate(pc, rspec);
-  }
+  MacroAssembler::patch_oop(pc, (address)obj());
+  int oop_index = _oop_recorder->find_index(value);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  _instructions->relocate(pc, rspec);
 }

 void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
  address pc = _instructions->start() + pc_offset;
  if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
    narrowKlass narrowOop = record_narrow_metadata_reference(_instructions, pc, constant, CHECK);
+    MacroAssembler::patch_narrow_klass(pc, narrowOop);
    TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop);
-    Unimplemented();
  } else {
    NativeMovConstReg* move = nativeMovConstReg_at(pc);
    void* reference = record_metadata_reference(_instructions, pc, constant, CHECK);
@ -167,8 +173,8 @@ VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
  if (jvmci_reg < RegisterImpl::number_of_registers) {
    return as_Register(jvmci_reg)->as_VMReg();
  } else {
-    jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers;
-    if (floatRegisterNumber < FloatRegisterImpl::number_of_registers) {
+    jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers_for_jvmci;
+    if (floatRegisterNumber >= 0 && floatRegisterNumber < FloatRegisterImpl::number_of_registers) {
      return as_FloatRegister(floatRegisterNumber)->as_VMReg();
    }
    JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
@ -185,6 +185,19 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
  return instructions * NativeInstruction::instruction_size;
 }

+int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) {
+  // Metatdata pointers are either narrow (32 bits) or wide (48 bits).
+  // We encode narrow ones by setting the upper 16 bits in the first
+  // instruction.
+  NativeInstruction *insn = nativeInstruction_at(insn_addr);
+  assert(Instruction_aarch64::extract(insn->encoding(), 31, 21) == 0b11010010101 &&
+         nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
+
+  Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
+  Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
+  return 2 * NativeInstruction::instruction_size;
+}
+
 address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) {
  long offset = 0;
  if ((Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000) {
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
@ -590,6 +590,7 @@ public:
 #endif

  static int patch_oop(address insn_addr, address o);
+  static int patch_narrow_klass(address insn_addr, narrowKlass n);

  address emit_trampoline_stub(int insts_call_instruction_offset, address target);

--- a/hotspot/src/cpu/aarch64/vm/register_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/register_aarch64.hpp
@ -42,8 +42,9 @@ inline Register as_Register(int encoding) {
 class RegisterImpl: public AbstractRegisterImpl {
 public:
  enum {
-    number_of_registers      = 32,
-    number_of_byte_registers = 32
+    number_of_registers         =   32,
+    number_of_byte_registers      = 32,
+    number_of_registers_for_jvmci = 34   // Including SP and ZR.
  };

  // derived registers, offsets, and addresses
@ -103,6 +104,10 @@ CONSTANT_REGISTER_DECLARATION(Register, r28,  (28));
 CONSTANT_REGISTER_DECLARATION(Register, r29,  (29));
 CONSTANT_REGISTER_DECLARATION(Register, r30,  (30));

+
+// r31 is not a general purpose register, but represents either the
+// stack pointer or the zero/discard register depending on the
+// instruction.
 CONSTANT_REGISTER_DECLARATION(Register, r31_sp, (31));
 CONSTANT_REGISTER_DECLARATION(Register, zr,  (32));
 CONSTANT_REGISTER_DECLARATION(Register, sp,  (33));
--- a/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
@ -2388,6 +2388,7 @@ void SharedRuntime::generate_deopt_blob() {

    __ movw(rcpool, (int32_t)Deoptimization::Unpack_reexecute);
    __ mov(c_rarg0, rthread);
+    __ movw(c_rarg2, rcpool); // exec mode
    __ lea(rscratch1,
           RuntimeAddress(CAST_FROM_FN_PTR(address,
                                           Deoptimization::uncommon_trap)));
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
@ -2743,7 +2743,7 @@ class StubGenerator: public StubCodeGenerator {
    __ align(CodeEntryAlignment);
    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");

-    Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52, _L_finish;
+    Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52;

    const Register from        = c_rarg0;  // source array address
    const Register to          = c_rarg1;  // destination array address
@ -2757,8 +2757,7 @@ class StubGenerator: public StubCodeGenerator {

      __ enter();

-      __ subsw(rscratch2, len_reg, zr);
-      __ br(Assembler::LE, _L_finish);
+      __ movw(rscratch2, len_reg);

      __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));

@ -2823,7 +2822,6 @@ class StubGenerator: public StubCodeGenerator {

      __ st1(v0, __ T16B, rvec);

-    __ BIND(_L_finish);
      __ mov(r0, rscratch2);

      __ leave();
@ -2849,7 +2847,7 @@ class StubGenerator: public StubCodeGenerator {
    __ align(CodeEntryAlignment);
    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");

-    Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52, _L_finish;
+    Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52;

    const Register from        = c_rarg0;  // source array address
    const Register to          = c_rarg1;  // destination array address
@ -2863,8 +2861,7 @@ class StubGenerator: public StubCodeGenerator {

      __ enter();

-      __ subsw(rscratch2, len_reg, zr);
-      __ br(Assembler::LE, _L_finish);
+      __ movw(rscratch2, len_reg);

      __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));

@ -2933,7 +2930,6 @@ class StubGenerator: public StubCodeGenerator {

      __ st1(v2, __ T16B, rvec);

-    __ BIND(_L_finish);
      __ mov(r0, rscratch2);

      __ leave();
--- a/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp
@ -203,6 +203,9 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
    __ mov(sp, r13);
    generate_transcendental_entry(kind, 2);
    break;
+  case Interpreter::java_lang_math_fmaD :
+  case Interpreter::java_lang_math_fmaF :
+    return NULL;
  default:
    ;
  }
@ -883,7 +886,7 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
  //   regular method entry code to generate the NPE.
  //
-  // This code is based on generate_accessor_enty.
+  // This code is based on generate_accessor_entry.
  //
  // rmethod: Method*
  // r13: senderSP must preserve for slow path, set SP to it on fast path
@ -901,11 +904,11 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
    __ ldr(local_0, Address(esp, 0));
    __ cbz(local_0, slow_path);

-
    // Load the value of the referent field.
    const Address field_address(local_0, referent_offset);
    __ load_heap_oop(local_0, field_address);

+    __ mov(r19, r13);   // Move senderSP to a callee-saved register
    // Generate the G1 pre-barrier code to log the value of
    // the referent field in an SATB buffer.
    __ enter(); // g1_write may call runtime
@ -917,7 +920,7 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
                            true /* expand_call */);
    __ leave();
    // areturn
-    __ andr(sp, r13, -16);  // done with stack
+    __ andr(sp, r19, -16);  // done with stack
    __ ret(lr);

    // generate a vanilla interpreter entry as the slow path
--- a/hotspot/src/cpu/ppc/vm/ppc.ad
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad
@ -9580,6 +9580,19 @@ instruct andI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  ins_pipe(pipe_class_default);
 %}

+// Left shifted Immediate And
+instruct andI_reg_immIhi16(iRegIdst dst, iRegIsrc src1, immIhi16  src2, flagsRegCR0 cr0) %{
+  match(Set dst (AndI src1 src2));
+  effect(KILL cr0);
+  format %{ "ANDIS   $dst, $src1, $src2.hi" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_andis_);
+    __ andis_($dst$$Register, $src1$$Register, (int)((unsigned short)(($src2$$constant & 0xFFFF0000) >> 16)));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // Immediate And
 instruct andI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2, flagsRegCR0 cr0) %{
  match(Set dst (AndI src1 src2));
--- a/hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.cpp
+++ b/hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.cpp
@ -1075,8 +1075,7 @@ void LIR_Assembler::reg2mem(LIR_Opr from, LIR_Opr dest_opr, BasicType type,
      {
        if (UseCompressedOops && !wide) {
          Register compressed_src = Z_R14;
-          __ z_lgr(compressed_src, from->as_register());
-          __ encode_heap_oop(compressed_src);
+          __ oop_encoder(compressed_src, from->as_register(), true, (disp_reg != Z_R1) ? Z_R1 : Z_R0, -1, true);
          offset = code_offset();
          if (short_disp) {
            __ z_st(compressed_src,  disp_value, disp_reg, dest);
--- a/hotspot/src/cpu/s390/vm/frame_s390.cpp
+++ b/hotspot/src/cpu/s390/vm/frame_s390.cpp
@ -156,7 +156,7 @@ void frame::patch_pc(Thread* thread, address pc) {
  }
  own_abi()->return_pc = (uint64_t)pc;
  _cb = CodeCache::find_blob(pc);
-  address original_pc = nmethod::get_deopt_original_pc(this);
+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
  if (original_pc != NULL) {
    assert(original_pc == _pc, "expected original to be stored before patching");
    _deopt_state = is_deoptimized;
--- a/hotspot/src/cpu/s390/vm/frame_s390.inline.hpp
+++ b/hotspot/src/cpu/s390/vm/frame_s390.inline.hpp
@ -39,7 +39,7 @@ inline void frame::find_codeblob_and_set_pc_and_deopt_state(address pc) {

  _fp = (intptr_t *) own_abi()->callers_sp;

-  address original_pc = nmethod::get_deopt_original_pc(this);
+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
  if (original_pc != NULL) {
    _pc = original_pc;
    _deopt_state = is_deoptimized;
--- a/hotspot/src/cpu/s390/vm/globals_s390.hpp
+++ b/hotspot/src/cpu/s390/vm/globals_s390.hpp
@ -92,9 +92,6 @@ define_pd_global(intx, InitArrayShortSize, 1*BytesPerLong);
  product(bool, ReoptimizeCallSequences, true,                                \
          "Reoptimize code-sequences of calls at runtime.")                   \
                                                                              \
-  product(bool, UseCountLeadingZerosInstruction, true,                        \
-          "Use count leading zeros instruction.")                             \
-                                                                              \
  product(bool, UseByteReverseInstruction, true,                              \
          "Use byte reverse instruction.")                                    \
                                                                              \
--- a/hotspot/src/cpu/s390/vm/macroAssembler_s390.hpp
+++ b/hotspot/src/cpu/s390/vm/macroAssembler_s390.hpp
@ -574,6 +574,7 @@ class MacroAssembler: public Assembler {
  static int call_far_patchable_ret_addr_offset() { return call_far_patchable_size(); }

  static bool call_far_patchable_requires_alignment_nop(address pc) {
+    if (!os::is_MP()) return false;
    int size = call_far_patchable_size();
    return ((intptr_t)(pc + size) & 0x03L) != 0;
  }
--- a/hotspot/src/cpu/s390/vm/nativeInst_s390.cpp
+++ b/hotspot/src/cpu/s390/vm/nativeInst_s390.cpp
@ -256,11 +256,7 @@ void NativeFarCall::verify() {
 address NativeFarCall::destination() {
  assert(MacroAssembler::is_call_far_patchable_at((address)this), "unexpected call type");
  address ctable = NULL;
-  if (MacroAssembler::call_far_patchable_requires_alignment_nop((address)this)) {
-    return MacroAssembler::get_dest_of_call_far_patchable_at(((address)this)+MacroAssembler::nop_size(), ctable);
-  } else {
-    return MacroAssembler::get_dest_of_call_far_patchable_at((address)this, ctable);
-  }
+  return MacroAssembler::get_dest_of_call_far_patchable_at((address)this, ctable);
 }


@ -610,20 +606,20 @@ void NativeMovRegMem::verify() {
  unsigned long inst1;
  Assembler::get_instruction(l2, &inst1);

-  if (!Assembler::is_z_lb(inst1)                         &&
-      !Assembler::is_z_llgh(inst1)                       &&
-      !Assembler::is_z_lh(inst1)                         &&
-      !Assembler::is_z_l(inst1)                          &&
-      !Assembler::is_z_llgf(inst1)                       &&
-      !Assembler::is_z_lg(inst1)                         &&
-      !Assembler::is_z_le(inst1)                         &&
-      !Assembler::is_z_ld(inst1)                         &&
-      !Assembler::is_z_stc(inst1)                        &&
-      !Assembler::is_z_sth(inst1)                        &&
-      !Assembler::is_z_st(inst1)                         &&
-      !(Assembler::is_z_lgr(inst1) && UseCompressedOops) &&
-      !Assembler::is_z_stg(inst1)                        &&
-      !Assembler::is_z_ste(inst1)                        &&
+  if (!Assembler::is_z_lb(inst1)   &&
+      !Assembler::is_z_llgh(inst1) &&
+      !Assembler::is_z_lh(inst1)   &&
+      !Assembler::is_z_l(inst1)    &&
+      !Assembler::is_z_llgf(inst1) &&
+      !Assembler::is_z_lg(inst1)   &&
+      !Assembler::is_z_le(inst1)   &&
+      !Assembler::is_z_ld(inst1)   &&
+      !Assembler::is_z_stc(inst1)  &&
+      !Assembler::is_z_sth(inst1)  &&
+      !Assembler::is_z_st(inst1)   &&
+      !UseCompressedOops           &&
+      !Assembler::is_z_stg(inst1)  &&
+      !Assembler::is_z_ste(inst1)  &&
      !Assembler::is_z_std(inst1)) {
    tty->cr();
    tty->print_cr("NativeMovRegMem::verify(): verifying addr " PTR_FORMAT
--- a/hotspot/src/cpu/s390/vm/relocInfo_s390.cpp
+++ b/hotspot/src/cpu/s390/vm/relocInfo_s390.cpp
@ -102,11 +102,8 @@ address Relocation::pd_call_destination(address orig_addr) {
    if (orig_addr == NULL) {
      call = nativeFarCall_at(inst_addr);
    } else {
-      if (MacroAssembler::is_call_far_patchable_pcrelative_at(inst_addr)) {
-        call = nativeFarCall_at(orig_addr);
-      } else {
-        call = nativeFarCall_at(orig_addr);  // must access location (in CP) where destination is stored in unmoved code, because load from CP is pc-relative
-      }
+      // must access location (in CP) where destination is stored in unmoved code, because load from CP is pc-relative
+      call = nativeFarCall_at(orig_addr);
    }
    return call->destination();
  }
--- a/hotspot/src/cpu/s390/vm/s390.ad
+++ b/hotspot/src/cpu/s390/vm/s390.ad
@ -1489,8 +1489,8 @@ const bool Matcher::match_rule_supported(int opcode) {
    case Op_CountLeadingZerosL:
    case Op_CountTrailingZerosI:
    case Op_CountTrailingZerosL:
-      // Implementation requires FLOGR instruction.
-      return UseCountLeadingZerosInstruction;
+      // Implementation requires FLOGR instruction, which is available since z9.
+      return true;

    case Op_ReverseBytesI:
    case Op_ReverseBytesL:
@ -9897,7 +9897,6 @@ instruct string_compareUL(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2

 // String IndexOfChar
 instruct indexOfChar_U(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
-  predicate(CompactStrings);
  match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
  effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
  ins_cost(200);
@ -10590,7 +10589,6 @@ instruct bytes_reverse_long(iRegL dst, iRegL src) %{
 instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
  match(Set dst (CountLeadingZerosI src));
  effect(KILL tmp, KILL cr);
-  predicate(UseCountLeadingZerosInstruction);  // See Matcher::match_rule_supported
  ins_cost(3 * DEFAULT_COST);
  size(14);
  format %{ "SLLG    $dst,$src,32\t# no need to always count 32 zeroes first\n\t"
@ -10629,7 +10627,6 @@ instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr)
 instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
  match(Set dst (CountLeadingZerosL src));
  effect(KILL tmp, KILL cr);
-  predicate(UseCountLeadingZerosInstruction);  // See Matcher::match_rule_supported
  ins_cost(DEFAULT_COST);
  size(4);
  format %{ "FLOGR   $dst,$src \t# count leading zeros (long)\n\t" %}
@ -10655,7 +10652,6 @@ instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr)
 instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
  match(Set dst (CountTrailingZerosI src));
  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
-  predicate(UseCountLeadingZerosInstruction);  // See Matcher::match_rule_supported
  ins_cost(8 * DEFAULT_COST);
  // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
  format %{ "LLGFR   $dst,$src  \t# clear upper 32 bits (we are dealing with int)\n\t"
@ -10709,7 +10705,6 @@ instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr
 instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
  match(Set dst (CountTrailingZerosL src));
  effect(TEMP_DEF dst, KILL tmp, KILL cr);
-  predicate(UseCountLeadingZerosInstruction);  // See Matcher::match_rule_supported
  ins_cost(8 * DEFAULT_COST);
  // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
  format %{ "LCGR    $dst,$src  \t# preserve src\n\t"
--- a/hotspot/src/cpu/s390/vm/templateTable_s390.cpp
+++ b/hotspot/src/cpu/s390/vm/templateTable_s390.cpp
@ -3831,17 +3831,17 @@ void TemplateTable::newarray() {

  // Call runtime.
  __ z_llgc(Z_ARG2, at_bcp(1));   // type
-  // size in Z_tos
+  __ z_lgfr(Z_ARG3, Z_tos);       // size
  call_VM(Z_RET,
          CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
-          Z_ARG2, Z_tos);
+          Z_ARG2, Z_ARG3);
 }

 void TemplateTable::anewarray() {
  transition(itos, atos);
  __ get_2_byte_integer_at_bcp(Z_ARG3, 1, InterpreterMacroAssembler::Unsigned);
  __ get_constant_pool(Z_ARG2);
-  __ z_llgfr(Z_ARG4, Z_tos);
+  __ z_lgfr(Z_ARG4, Z_tos);
  call_VM(Z_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
          Z_ARG2, Z_ARG3, Z_ARG4);
 }
--- a/hotspot/src/cpu/s390/vm/vm_version_s390.cpp
+++ b/hotspot/src/cpu/s390/vm/vm_version_s390.cpp
@ -271,6 +271,31 @@ void VM_Version::set_features_string() {
    tty->print_cr("                oldest detected generation is %s", _features_string);
    _features_string = "z/Architecture (ambiguous detection)";
  }
+
+  if (has_Crypto_AES()) {
+    char buf[256];
+    assert(strlen(_features_string) + 4 + 3*4 + 1 < sizeof(buf), "increase buffer size");
+    jio_snprintf(buf, sizeof(buf), "%s aes%s%s%s", // String 'aes' must be surrounded by spaces so that jtreg tests recognize it.
+                 _features_string,
+                 has_Crypto_AES128() ? " 128" : "",
+                 has_Crypto_AES192() ? " 192" : "",
+                 has_Crypto_AES256() ? " 256" : "");
+    _features_string = os::strdup(buf);
+  }
+
+  if (has_Crypto_SHA()) {
+    char buf[256];
+    assert(strlen(_features_string) + 4 + 2 + 2*4 + 6 + 1 < sizeof(buf), "increase buffer size");
+    // String 'sha1' etc must be surrounded by spaces so that jtreg tests recognize it.
+    jio_snprintf(buf, sizeof(buf), "%s %s%s%s%s",
+                 _features_string,
+                 has_Crypto_SHA1()   ? " sha1"   : "",
+                 has_Crypto_SHA256() ? " sha256" : "",
+                 has_Crypto_SHA512() ? " sha512" : "",
+                 has_Crypto_GHASH()  ? " ghash"  : "");
+    if (has_Crypto_AES()) { os::free((void *)_features_string); }
+    _features_string = os::strdup(buf);
+  }
 }

 // featureBuffer - bit array indicating availability of various features
@ -369,7 +394,7 @@ void VM_Version::print_features_internal(const char* text, bool print_anyway) {

    if (has_Crypto()) {
      tty->cr();
-      tty->print_cr("detailled availability of %s capabilities:", "CryptoFacility");
+      tty->print_cr("detailed availability of %s capabilities:", "CryptoFacility");
      if (test_feature_bit(&_cipher_features[0], -1, 2*Cipher::_featureBits)) {
        tty->cr();
        tty->print_cr("  available: %s", "Message Cipher Functions");
@ -479,7 +504,6 @@ void VM_Version::reset_features(bool reset) {
  }
 }

-
 void VM_Version::set_features_z900(bool reset) {
  reset_features(reset);

--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
@ -351,7 +351,7 @@ void VM_Version::initialize() {
      FLAG_SET_DEFAULT(UseCRC32Intrinsics, true);
    }
  } else if (UseCRC32Intrinsics) {
-    warning("SPARC CRC32 intrinsics require VIS3 insructions support. Intriniscs will be disabled");
+    warning("SPARC CRC32 intrinsics require VIS3 instructions support. Intrinsics will be disabled");
    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
  }

--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
@ -4285,8 +4285,7 @@ void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {

 void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
  assert(VM_Version::supports_sha(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes);
+  int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, /* rex_w */ false);
  emit_int8((unsigned char)0xCC);
  emit_int8((unsigned char)(0xC0 | encode));
  emit_int8((unsigned char)imm8);
@ -4294,24 +4293,21 @@ void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {

 void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
  assert(VM_Version::supports_sha(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
  emit_int8((unsigned char)0xC8);
  emit_int8((unsigned char)(0xC0 | encode));
 }

 void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
  assert(VM_Version::supports_sha(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
  emit_int8((unsigned char)0xC9);
  emit_int8((unsigned char)(0xC0 | encode));
 }

 void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
  assert(VM_Version::supports_sha(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
  emit_int8((unsigned char)0xCA);
  emit_int8((unsigned char)(0xC0 | encode));
 }
@ -4319,24 +4315,21 @@ void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
 // xmm0 is implicit additional source to this instruction.
 void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
  assert(VM_Version::supports_sha(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
  emit_int8((unsigned char)0xCB);
  emit_int8((unsigned char)(0xC0 | encode));
 }

 void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
  assert(VM_Version::supports_sha(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
  emit_int8((unsigned char)0xCC);
  emit_int8((unsigned char)(0xC0 | encode));
 }

 void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
  assert(VM_Version::supports_sha(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
  emit_int8((unsigned char)0xCD);
  emit_int8((unsigned char)(0xC0 | encode));
 }
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
@ -10773,16 +10773,13 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
  // save length for return
  push(len);

-  // 8165287: EVEX version disabled for now, needs to be refactored as
-  // it is returning incorrect results.
  if ((UseAVX > 2) && // AVX512
-    0 &&
    VM_Version::supports_avx512vlbw() &&
    VM_Version::supports_bmi2()) {

    set_vector_masking();  // opening of the stub context for programming mask registers

-    Label copy_32_loop, copy_loop_tail, copy_just_portion_of_candidates;
+    Label copy_32_loop, copy_loop_tail, restore_k1_return_zero;

    // alignement
    Label post_alignement;
@ -10797,16 +10794,16 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
    movl(result, 0x00FF);
    evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);

-    testl(len, -64);
-    jcc(Assembler::zero, post_alignement);
-
    // Save k1
    kmovql(k3, k1);

+    testl(len, -64);
+    jcc(Assembler::zero, post_alignement);
+
    movl(tmp5, dst);
-    andl(tmp5, (64 - 1));
+    andl(tmp5, (32 - 1));
    negl(tmp5);
-    andl(tmp5, (64 - 1));
+    andl(tmp5, (32 - 1));

    // bail out when there is nothing to be done
    testl(tmp5, 0xFFFFFFFF);
@ -10816,13 +10813,12 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
    movl(result, 0xFFFFFFFF);
    shlxl(result, result, tmp5);
    notl(result);
-
    kmovdl(k1, result);

    evmovdquw(tmp1Reg, k1, Address(src, 0), Assembler::AVX_512bit);
    evpcmpuw(k2, k1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
    ktestd(k2, k1);
-    jcc(Assembler::carryClear, copy_just_portion_of_candidates);
+    jcc(Assembler::carryClear, restore_k1_return_zero);

    evpmovwb(Address(dst, 0), k1, tmp1Reg, Assembler::AVX_512bit);

@ -10835,7 +10831,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
    // end of alignement

    movl(tmp5, len);
-    andl(tmp5, (32 - 1));   // tail count (in chars)
+    andl(tmp5, (32 - 1));    // tail count (in chars)
    andl(len, ~(32 - 1));    // vector count (in chars)
    jcc(Assembler::zero, copy_loop_tail);

@ -10847,7 +10843,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
    evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
    evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
    kortestdl(k2, k2);
-    jcc(Assembler::carryClear, copy_just_portion_of_candidates);
+    jcc(Assembler::carryClear, restore_k1_return_zero);

    // All elements in current processed chunk are valid candidates for
    // compression. Write a truncated byte elements to the memory.
@ -10858,11 +10854,10 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
    bind(copy_loop_tail);
    // bail out when there is nothing to be done
    testl(tmp5, 0xFFFFFFFF);
+    // Restore k1
+    kmovql(k1, k3);
    jcc(Assembler::zero, return_length);

-    // Save k1
-    kmovql(k3, k1);
-
    movl(len, tmp5);

    // ~(~0 << len), where len is the # of remaining elements to process
@ -10875,30 +10870,16 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
    evmovdquw(tmp1Reg, k1, Address(src, 0), Assembler::AVX_512bit);
    evpcmpuw(k2, k1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
    ktestd(k2, k1);
-    jcc(Assembler::carryClear, copy_just_portion_of_candidates);
+    jcc(Assembler::carryClear, restore_k1_return_zero);

    evpmovwb(Address(dst, 0), k1, tmp1Reg, Assembler::AVX_512bit);
    // Restore k1
    kmovql(k1, k3);
-
    jmp(return_length);

-    bind(copy_just_portion_of_candidates);
-    kmovdl(tmp5, k2);
-    tzcntl(tmp5, tmp5);
-
-    // ~(~0 << tmp5), where tmp5 is a number of elements in an array from the
-    // result to the first element larger than 0xFF
-    movl(result, 0xFFFFFFFF);
-    shlxl(result, result, tmp5);
-    notl(result);
-
-    kmovdl(k1, result);
-
-    evpmovwb(Address(dst, 0), k1, tmp1Reg, Assembler::AVX_512bit);
+    bind(restore_k1_return_zero);
    // Restore k1
    kmovql(k1, k3);
-
    jmp(return_zero);

    clear_vector_masking();   // closing of the stub context for programming mask registers
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@ -3857,7 +3857,7 @@ class StubGenerator: public StubCodeGenerator {
      StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
    }
-    if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
+    if (VM_Version::supports_sse2() && UseLibmIntrinsic && InlineIntrinsics) {
      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) ||
          vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) ||
          vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@ -5017,7 +5017,7 @@ class StubGenerator: public StubCodeGenerator {
      StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
    }
-    if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
+    if (VM_Version::supports_sse2() && UseLibmIntrinsic && InlineIntrinsics) {
      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) ||
          vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) ||
          vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
--- a/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_32.cpp
+++ b/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_32.cpp
@ -342,6 +342,9 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
  //        [ hi(arg) ]
  //
  if (kind == Interpreter::java_lang_math_fmaD) {
+    if (!UseFMA) {
+      return NULL; // Generate a vanilla entry
+    }
    __ movdbl(xmm2, Address(rsp, 5 * wordSize));
    __ movdbl(xmm1, Address(rsp, 3 * wordSize));
    __ movdbl(xmm0, Address(rsp, 1 * wordSize));
@ -352,6 +355,9 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M

    return entry_point;
  } else if (kind == Interpreter::java_lang_math_fmaF) {
+    if (!UseFMA) {
+      return NULL; // Generate a vanilla entry
+    }
    __ movflt(xmm2, Address(rsp, 3 * wordSize));
    __ movflt(xmm1, Address(rsp, 2 * wordSize));
    __ movflt(xmm0, Address(rsp, 1 * wordSize));
--- a/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp
@ -370,11 +370,17 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
  //

  if (kind == Interpreter::java_lang_math_fmaD) {
+    if (!UseFMA) {
+      return NULL; // Generate a vanilla entry
+    }
    __ movdbl(xmm0, Address(rsp, wordSize));
    __ movdbl(xmm1, Address(rsp, 3 * wordSize));
    __ movdbl(xmm2, Address(rsp, 5 * wordSize));
    __ fmad(xmm0, xmm1, xmm2, xmm0);
  } else if (kind == Interpreter::java_lang_math_fmaF) {
+    if (!UseFMA) {
+      return NULL; // Generate a vanilla entry
+    }
    __ movflt(xmm0, Address(rsp, wordSize));
    __ movflt(xmm1, Address(rsp, 2 * wordSize));
    __ movflt(xmm2, Address(rsp, 3 * wordSize));
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
@ -65,6 +65,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
    const int      CPU_FAMILY_SHIFT = 8;
    const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
    const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
+    bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);

    Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
@ -358,36 +359,39 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
    __ cmpl(rax, 0xE0);
    __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported

-    // EVEX setup: run in lowest evex mode
-    VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
-    UseAVX = 3;
-    UseSSE = 2;
+    // If UseAVX is unitialized or is set by the user to include EVEX
+    if (use_evex) {
+      // EVEX setup: run in lowest evex mode
+      VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
+      UseAVX = 3;
+      UseSSE = 2;
 #ifdef _WINDOWS
-    // xmm5-xmm15 are not preserved by caller on windows
-    // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
-    __ subptr(rsp, 64);
-    __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
+      // xmm5-xmm15 are not preserved by caller on windows
+      // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
+      __ subptr(rsp, 64);
+      __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 #ifdef _LP64
-    __ subptr(rsp, 64);
-    __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
-    __ subptr(rsp, 64);
-    __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
+      __ subptr(rsp, 64);
+      __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
+      __ subptr(rsp, 64);
+      __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 #endif // _LP64
 #endif // _WINDOWS

-    // load value into all 64 bytes of zmm7 register
-    __ movl(rcx, VM_Version::ymm_test_value());
-    __ movdl(xmm0, rcx);
-    __ movl(rcx, 0xffff);
-    __ kmovwl(k1, rcx);
-    __ evpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
-    __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
+      // load value into all 64 bytes of zmm7 register
+      __ movl(rcx, VM_Version::ymm_test_value());
+      __ movdl(xmm0, rcx);
+      __ movl(rcx, 0xffff);
+      __ kmovwl(k1, rcx);
+      __ evpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
+      __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 #ifdef _LP64
-    __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
-    __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
+      __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
+      __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 #endif
-    VM_Version::clean_cpuFeatures();
-    __ jmp(save_restore_except);
+      VM_Version::clean_cpuFeatures();
+      __ jmp(save_restore_except);
+    }

    __ bind(legacy_setup);
    // AVX setup
@ -441,32 +445,35 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
    __ cmpl(rax, 0xE0);
    __ jccb(Assembler::notEqual, legacy_save_restore);

-    // EVEX check: run in lowest evex mode
-    VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
-    UseAVX = 3;
-    UseSSE = 2;
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
-    __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
-    __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
+    // If UseAVX is unitialized or is set by the user to include EVEX
+    if (use_evex) {
+      // EVEX check: run in lowest evex mode
+      VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
+      UseAVX = 3;
+      UseSSE = 2;
+      __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
+      __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
+      __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 #ifdef _LP64
-    __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
-    __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
+      __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
+      __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 #endif

 #ifdef _WINDOWS
 #ifdef _LP64
-    __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
-    __ addptr(rsp, 64);
-    __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
-    __ addptr(rsp, 64);
+      __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
+      __ addptr(rsp, 64);
+      __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
+      __ addptr(rsp, 64);
 #endif // _LP64
-    __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
-    __ addptr(rsp, 64);
+      __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
+      __ addptr(rsp, 64);
 #endif // _WINDOWS
-    VM_Version::clean_cpuFeatures();
-    UseAVX = saved_useavx;
-    UseSSE = saved_usesse;
-    __ jmp(wrapup);
+      VM_Version::clean_cpuFeatures();
+      UseAVX = saved_useavx;
+      UseSSE = saved_usesse;
+      __ jmp(wrapup);
+    }

    __ bind(legacy_save_restore);
    // AVX check
--- a/hotspot/src/jdk.hotspot.agent/macosx/native/libsaproc/ps_core.c
+++ b/hotspot/src/jdk.hotspot.agent/macosx/native/libsaproc/ps_core.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -697,14 +697,8 @@ err:
 }

 /**local function **/
-bool exists(const char *fname)
-{
-  int fd;
-  if ((fd = open(fname, O_RDONLY)) > 0) {
-    close(fd);
-    return true;
-  }
-  return false;
+bool exists(const char *fname) {
+  return access(fname, F_OK) == 0;
 }

 // we check: 1. lib
--- a/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/CommandProcessor.java
+++ b/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/CommandProcessor.java
@ -1957,7 +1957,7 @@ public class CommandProcessor {
        if (doit == null) {
            out.println("Unrecognized command.  Try help...");
        } else if (!debugger.isAttached() && !doit.okIfDisconnected) {
-            out.println("Command not valid until the attached to a VM");
+            out.println("Command not valid until attached to a VM");
        } else {
            try {
                doit.doit(args);
--- a/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HSDB.java
+++ b/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HSDB.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -1474,7 +1474,7 @@ public class HSDB implements ObjectHistogramPanel.Listener, SAListener {
                  return attached;
              }
              public void attach(String pid) {
-                  attach(pid);
+                  HSDB.this.attach(pid);
              }
              public void attach(String java, String core) {
              }
--- a/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java
+++ b/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java
@ -975,7 +975,7 @@ public class InstanceKlass extends Klass {
    while (l <= h) {
      int mid = (l + h) >> 1;
      Method m = methods.at(mid);
-      int res = m.getName().fastCompare(name);
+      long res = m.getName().fastCompare(name);
      if (res == 0) {
        // found matching name; do linear search to find matching signature
        // first, quick check for common case
--- a/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Symbol.java
+++ b/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Symbol.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -139,8 +139,8 @@ public class Symbol extends VMObject {
      time-invariant order Since Symbol* are in C_HEAP, their
      relative order in memory never changes, so use address
      comparison for speed. */
-  public int fastCompare(Symbol other) {
-    return (int) addr.minus(other.addr);
+  public long fastCompare(Symbol other) {
+    return addr.minus(other.addr);
  }

  private static String readModifiedUTF8(byte[] buf) throws IOException {
--- a/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Bytes.java
+++ b/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Bytes.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -51,7 +51,7 @@ public class Bytes {
    if (!swap)
      return x;

-    return (swapShort((short) x) << 16) | (swapShort((short) (x >> 16)) & 0xFFFF);
+    return ((int)swapShort((short) x) << 16) | (swapShort((short) (x >> 16)) & 0xFFFF);
  }

  /** Should only swap if the hardware's underlying byte order is
@ -60,6 +60,6 @@ public class Bytes {
    if (!swap)
      return x;

-    return (swapInt((int) x) << 32) | (swapInt((int) (x >> 32)) & 0xFFFFFFFF);
+    return ((long)swapInt((int) x) << 32) | (swapInt((int) (x >> 32)) & 0xFFFFFFFF);
  }
 }
--- a/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/soql/JSJavaHeap.java
+++ b/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/soql/JSJavaHeap.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2004, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -177,14 +177,14 @@ public class JSJavaHeap extends DefaultScriptObject {
                        JSJavaObject k = jk.getJSJavaClass();
                        JSJavaObject l = factory.newJSJavaObject(loader);
                        if (k != null) {
-                         if (k != null) {
-                       try {
-                               finalFunc.call(new Object[] { k, l });
-                       } catch (ScriptException exp) {
-                         throw new RuntimeException(exp);
+                          if (l != null) {
+                            try {
+                              finalFunc.call(new Object[] { k, l });
+                            } catch (ScriptException exp) {
+                              throw new RuntimeException(exp);
+                            }
+                          }
                       }
-                           }
-                        }
                    }
                });

--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.aarch64/src/jdk/vm/ci/aarch64/AArch64.java
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.aarch64/src/jdk/vm/ci/aarch64/AArch64.java
@ -84,6 +84,10 @@ public class AArch64 extends Architecture {

    public static final Register lr = r30;

+    // Used by runtime code: cannot be compiler-allocated.
+    public static final Register rscratch1 = r8;
+    public static final Register rscratch2 = r9;
+
    // @formatter:off
    public static final RegisterArray cpuRegisters = new RegisterArray(
        r0,  r1,  r2,  r3,  r4,  r5,  r6,  r7,
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.aarch64/src/jdk/vm/ci/hotspot/aarch64/AArch64HotSpotRegisterConfig.java
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.aarch64/src/jdk/vm/ci/hotspot/aarch64/AArch64HotSpotRegisterConfig.java
@ -25,18 +25,19 @@ package jdk.vm.ci.hotspot.aarch64;
 import static jdk.vm.ci.aarch64.AArch64.lr;
 import static jdk.vm.ci.aarch64.AArch64.r0;
 import static jdk.vm.ci.aarch64.AArch64.r1;
-import static jdk.vm.ci.aarch64.AArch64.r12;
 import static jdk.vm.ci.aarch64.AArch64.r2;
-import static jdk.vm.ci.aarch64.AArch64.r27;
-import static jdk.vm.ci.aarch64.AArch64.r28;
-import static jdk.vm.ci.aarch64.AArch64.r29;
 import static jdk.vm.ci.aarch64.AArch64.r3;
-import static jdk.vm.ci.aarch64.AArch64.r31;
 import static jdk.vm.ci.aarch64.AArch64.r4;
 import static jdk.vm.ci.aarch64.AArch64.r5;
 import static jdk.vm.ci.aarch64.AArch64.r6;
 import static jdk.vm.ci.aarch64.AArch64.r7;
-import static jdk.vm.ci.aarch64.AArch64.r9;
+import static jdk.vm.ci.aarch64.AArch64.rscratch1;
+import static jdk.vm.ci.aarch64.AArch64.rscratch2;
+import static jdk.vm.ci.aarch64.AArch64.r12;
+import static jdk.vm.ci.aarch64.AArch64.r27;
+import static jdk.vm.ci.aarch64.AArch64.r28;
+import static jdk.vm.ci.aarch64.AArch64.r29;
+import static jdk.vm.ci.aarch64.AArch64.r31;
 import static jdk.vm.ci.aarch64.AArch64.sp;
 import static jdk.vm.ci.aarch64.AArch64.v0;
 import static jdk.vm.ci.aarch64.AArch64.v1;
@ -114,7 +115,7 @@ public class AArch64HotSpotRegisterConfig implements RegisterConfig {
    private final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(r0, r1, r2, r3, r4, r5, r6, r7);
    private final RegisterArray simdParameterRegisters = new RegisterArray(v0, v1, v2, v3, v4, v5, v6, v7);

-    public static final Register inlineCacheRegister = r9;
+    public static final Register inlineCacheRegister = rscratch2;

    /**
     * Vtable stubs expect the metaspace Method in r12.
@ -125,7 +126,8 @@ public class AArch64HotSpotRegisterConfig implements RegisterConfig {
    public static final Register threadRegister = r28;
    public static final Register fp = r29;

-    private static final RegisterArray reservedRegisters = new RegisterArray(threadRegister, fp, lr, r31, zr, sp);
+    private static final RegisterArray reservedRegisters
+        = new RegisterArray(rscratch1, rscratch2, threadRegister, fp, lr, r31, zr, sp);

    private static RegisterArray initAllocatable(Architecture arch, boolean reserveForHeapBase) {
        RegisterArray allRegisters = arch.getAvailableValueRegisters();
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedJavaMethodImpl.java
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedJavaMethodImpl.java
@ -472,7 +472,8 @@ final class HotSpotResolvedJavaMethodImpl extends HotSpotMethod implements HotSp
        Parameter[] res = new Parameter[javaParameters.length];
        for (int i = 0; i < res.length; i++) {
            java.lang.reflect.Parameter src = javaParameters[i];
-            res[i] = new Parameter(src.getName(), src.getModifiers(), this, i);
+            String paramName = src.isNamePresent() ? src.getName() : null;
+            res[i] = new Parameter(paramName, src.getModifiers(), this, i);
        }
        return res;
    }
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.meta/src/jdk/vm/ci/meta/ResolvedJavaMethod.java
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.meta/src/jdk/vm/ci/meta/ResolvedJavaMethod.java
@ -177,7 +177,7 @@ public interface ResolvedJavaMethod extends JavaMethod, InvokeTarget, ModifiersP
    /**
     * A {@code Parameter} provides information about method parameters.
     */
-    public static class Parameter implements AnnotatedElement {
+    class Parameter implements AnnotatedElement {
        private final String name;
        private final ResolvedJavaMethod method;
        private final int modifiers;
@ -186,7 +186,9 @@ public interface ResolvedJavaMethod extends JavaMethod, InvokeTarget, ModifiersP
        /**
         * Constructor for {@code Parameter}.
         *
-         * @param name the name of the parameter
+         * @param name the name of the parameter or {@code null} if there is no
+         *            {@literal MethodParameters} class file attribute providing a non-empty name
+         *            for the parameter
         * @param modifiers the modifier flags for the parameter
         * @param method the method which defines this parameter
         * @param index the index of the parameter
@ -195,6 +197,7 @@ public interface ResolvedJavaMethod extends JavaMethod, InvokeTarget, ModifiersP
                        int modifiers,
                        ResolvedJavaMethod method,
                        int index) {
+            assert name == null || !name.isEmpty();
            this.name = name;
            this.modifiers = modifiers;
            this.method = method;
@ -202,10 +205,20 @@ public interface ResolvedJavaMethod extends JavaMethod, InvokeTarget, ModifiersP
        }

        /**
-         * Gets the name of the parameter.
+         * Gets the name of the parameter. If the parameter's name is {@linkplain #isNamePresent()
+         * present}, then this method returns the name provided by the class file. Otherwise, this
+         * method synthesizes a name of the form argN, where N is the index of the parameter in the
+         * descriptor of the method which declares the parameter.
+         *
+         * @return the name of the parameter, either provided by the class file or synthesized if
+         *         the class file does not provide a name
         */
        public String getName() {
-            return name;
+            if (name == null) {
+                return "arg" + index;
+            } else {
+                return name;
+            }
        }

        /**
@ -216,7 +229,7 @@ public interface ResolvedJavaMethod extends JavaMethod, InvokeTarget, ModifiersP
        }

        /**
-         * Get the modifier flags for the parameter
+         * Get the modifier flags for the parameter.
         */
        public int getModifiers() {
            return modifiers;
@ -243,6 +256,16 @@ public interface ResolvedJavaMethod extends JavaMethod, InvokeTarget, ModifiersP
            return method.getSignature().getParameterType(index, method.getDeclaringClass());
        }

+        /**
+         * Determines if the parameter has a name according to a {@literal MethodParameters} class
+         * file attribute.
+         *
+         * @return true if and only if the parameter has a name according to the class file.
+         */
+        public boolean isNamePresent() {
+            return name != null;
+        }
+
        /**
         * Determines if the parameter represents a variable argument list.
         */
--- a/hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.cpp
+++ b/hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.cpp
@ -171,6 +171,8 @@ frame os::get_sender_for_C_frame(frame* fr) {
 }

 frame os::current_frame() {
+  // Expected to return the stack pointer of this method.
+  // But if inlined, returns the stack pointer of our caller!
  intptr_t* csp = (intptr_t*) *((intptr_t*) os::current_stack_pointer());
  assert (csp != NULL, "sp should not be NULL");
  // Pass a dummy pc. This way we don't have to load it from the
@ -184,8 +186,13 @@ frame os::current_frame() {
    assert(senderFrame.pc() != NULL, "Sender pc should not be NULL");
    // Return sender of sender of current topframe which hopefully
    // both have pc != NULL.
+#ifdef _NMT_NOINLINE_   // Is set in slowdebug builds.
+    // Current_stack_pointer is not inlined, we must pop one more frame.
    frame tmp = os::get_sender_for_C_frame(&topframe);
    return os::get_sender_for_C_frame(&tmp);
+#else
+    return os::get_sender_for_C_frame(&topframe);
+#endif
  }
 }

@ -374,7 +381,7 @@ JVM_handle_linux_signal(int sig,
        // BugId 4454115: A read from a MappedByteBuffer can fault here if the
        // underlying file has been truncated. Do not crash the VM in such a case.
        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
-        nmethod* nm = (cb != NULL && cb->is_nmethod()) ? (nmethod*)cb : NULL;
+        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
        if (nm != NULL && nm->has_unsafe_access()) {
          // We don't really need a stub here! Just set the pending exeption and
          // continue at the next instruction after the faulting read. Returning
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
@ -1813,14 +1813,10 @@ void GraphBuilder::invoke(Bytecodes::Code code) {
  ciKlass*              holder = stream()->get_declared_method_holder();
  const Bytecodes::Code bc_raw = stream()->cur_bc_raw();
  assert(declared_signature != NULL, "cannot be null");
+  assert(will_link == target->is_loaded(), "");

  ciInstanceKlass* klass = target->holder();
-
-  // Make sure there are no evident problems with linking the instruction.
-  bool is_resolved = true;
-  if (klass->is_loaded() && !target->is_loaded()) {
-    is_resolved = false; // method not found
-  }
+  assert(!target->is_loaded() || klass->is_loaded(), "loaded target must imply loaded klass");

  // check if CHA possible: if so, change the code to invoke_special
  ciInstanceKlass* calling_klass = method()->holder();
@ -1868,7 +1864,7 @@ void GraphBuilder::invoke(Bytecodes::Code code) {
  ciMethod* cha_monomorphic_target = NULL;
  ciMethod* exact_target = NULL;
  Value better_receiver = NULL;
-  if (UseCHA && DeoptC1 && klass->is_loaded() && target->is_loaded() &&
+  if (UseCHA && DeoptC1 && target->is_loaded() &&
      !(// %%% FIXME: Are both of these relevant?
        target->is_method_handle_intrinsic() ||
        target->is_compiled_lambda_form()) &&
@ -1988,8 +1984,7 @@ void GraphBuilder::invoke(Bytecodes::Code code) {
  }

  // check if we could do inlining
-  if (!PatchALot && Inline && is_resolved &&
-      klass->is_loaded() && target->is_loaded() &&
+  if (!PatchALot && Inline && target->is_loaded() &&
      (klass->is_initialized() || klass->is_interface() && target->holder()->is_initialized())
      && !patch_for_appendix) {
    // callee is known => check if we have static binding
@ -2032,7 +2027,6 @@ void GraphBuilder::invoke(Bytecodes::Code code) {
  CHECK_BAILOUT();

  // inlining not successful => standard invoke
-  bool is_loaded = target->is_loaded();
  ValueType* result_type = as_ValueType(declared_signature->return_type());
  ValueStack* state_before = copy_state_exhandling();

@ -2049,7 +2043,7 @@ void GraphBuilder::invoke(Bytecodes::Code code) {
  // Currently only supported on Sparc.
  // The UseInlineCaches only controls dispatch to invokevirtuals for
  // loaded classes which we weren't able to statically bind.
-  if (!UseInlineCaches && is_resolved && is_loaded && code == Bytecodes::_invokevirtual
+  if (!UseInlineCaches && target->is_loaded() && code == Bytecodes::_invokevirtual
      && !target->can_be_statically_bound()) {
    // Find a vtable index if one is available
    // For arrays, callee_holder is Object. Resolving the call with
@ -2062,16 +2056,24 @@ void GraphBuilder::invoke(Bytecodes::Code code) {
  }
 #endif

-  if (is_resolved) {
-    // invokespecial always needs a NULL check. invokevirtual where the target is
-    // final or where it's not known whether the target is final requires a NULL check.
-    // Otherwise normal invokevirtual will perform the null check during the lookup
-    // logic or the unverified entry point.  Profiling of calls requires that
-    // the null check is performed in all cases.
-    bool do_null_check = (recv != NULL) &&
-        (code == Bytecodes::_invokespecial || !is_loaded || target->is_final() || (is_profiling() && profile_calls()));
+  // A null check is required here (when there is a receiver) for any of the following cases
+  // - invokespecial, always need a null check.
+  // - invokevirtual, when the target is final and loaded. Calls to final targets will become optimized
+  //   and require null checking. If the target is loaded a null check is emitted here.
+  //   If the target isn't loaded the null check must happen after the call resolution. We achieve that
+  //   by using the target methods unverified entry point (see CompiledIC::compute_monomorphic_entry).
+  //   (The JVM specification requires that LinkageError must be thrown before a NPE. An unloaded target may
+  //   potentially fail, and can't have the null check before the resolution.)
+  // - A call that will be profiled. (But we can't add a null check when the target is unloaded, by the same
+  //   reason as above, so calls with a receiver to unloaded targets can't be profiled.)
+  //
+  // Normal invokevirtual will perform the null check during lookup

-    if (do_null_check) {
+  bool need_null_check = (code == Bytecodes::_invokespecial) ||
+      (target->is_loaded() && (target->is_final_method() || (is_profiling() && profile_calls())));
+
+  if (need_null_check) {
+    if (recv != NULL) {
      null_check(recv);
    }

@ -2090,9 +2092,6 @@ void GraphBuilder::invoke(Bytecodes::Code code) {
        profile_call(target, recv, target_klass, collect_args_for_profiling(args, NULL, false), false);
      }
    }
-  } else {
-    // No need in null check or profiling: linkage error will be thrown at runtime
-    // during resolution.
  }

  Invoke* result = new Invoke(code, result_type, recv, args, vtable_index, target, state_before);
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
@ -2976,7 +2976,6 @@ void LIRGenerator::do_Invoke(Invoke* x) {
  }

  // emit invoke code
-  bool optimized = x->target_is_loaded() && x->target_is_final();
  assert(receiver->is_illegal() || receiver->is_equal(LIR_Assembler::receiverOpr()), "must match");

  // JSR 292
@ -3001,9 +3000,9 @@ void LIRGenerator::do_Invoke(Invoke* x) {
    case Bytecodes::_invokespecial:
    case Bytecodes::_invokevirtual:
    case Bytecodes::_invokeinterface:
-      // for final target we still produce an inline cache, in order
-      // to be able to call mixed mode
-      if (x->code() == Bytecodes::_invokespecial || optimized) {
+      // for loaded and final (method or class) target we still produce an inline cache,
+      // in order to be able to call mixed mode
+      if (x->code() == Bytecodes::_invokespecial || x->target_is_final()) {
        __ call_opt_virtual(target, receiver, result_register,
                            SharedRuntime::get_resolve_opt_virtual_call_stub(),
                            arg_list, info);
--- a/hotspot/src/share/vm/classfile/classFileParser.cpp
+++ b/hotspot/src/share/vm/classfile/classFileParser.cpp
@ -4349,13 +4349,34 @@ static void check_super_class_access(const InstanceKlass* this_klass, TRAPS) {
  assert(this_klass != NULL, "invariant");
  const Klass* const super = this_klass->super();
  if (super != NULL) {
+
+    // If the loader is not the boot loader then throw an exception if its
+    // superclass is in package jdk.internal.reflect and its loader is not a
+    // special reflection class loader
+    if (!this_klass->class_loader_data()->is_the_null_class_loader_data()) {
+      assert(super->is_instance_klass(), "super is not instance klass");
+      PackageEntry* super_package = super->package();
+      if (super_package != NULL &&
+          super_package->name()->fast_compare(vmSymbols::jdk_internal_reflect()) == 0 &&
+          !java_lang_ClassLoader::is_reflection_class_loader(this_klass->class_loader())) {
+        ResourceMark rm(THREAD);
+        Exceptions::fthrow(
+          THREAD_AND_LOCATION,
+          vmSymbols::java_lang_IllegalAccessError(),
+          "class %s loaded by %s cannot access jdk/internal/reflect superclass %s",
+          this_klass->external_name(),
+          this_klass->class_loader_data()->loader_name(),
+          super->external_name());
+        return;
+      }
+    }
+
    Reflection::VerifyClassAccessResults vca_result =
      Reflection::verify_class_access(this_klass, super, false);
    if (vca_result != Reflection::ACCESS_OK) {
      ResourceMark rm(THREAD);
      char* msg =  Reflection::verify_class_access_msg(this_klass, super, vca_result);
      if (msg == NULL) {
-        ResourceMark rm(THREAD);
        Exceptions::fthrow(
          THREAD_AND_LOCATION,
          vmSymbols::java_lang_IllegalAccessError(),
--- a/hotspot/src/share/vm/classfile/compactHashtable.cpp
+++ b/hotspot/src/share/vm/classfile/compactHashtable.cpp
@ -171,11 +171,11 @@ void CompactHashtableWriter::dump(SimpleCompactHashtable *cht, const char* table

 void CompactSymbolTableWriter::add(unsigned int hash, Symbol *symbol) {
  address base_address = address(MetaspaceShared::shared_rs()->base());
-  uintx max_delta = uintx(MetaspaceShared::shared_rs()->size());
-  assert(max_delta <= MAX_SHARED_DELTA, "range check");

  uintx deltax = address(symbol) - base_address;
-  assert(deltax < max_delta, "range check");
+  // The symbols are in RO space, which is smaler than MAX_SHARED_DELTA.
+  // The assert below is just to be extra cautious.
+  assert(deltax <= MAX_SHARED_DELTA, "the delta is too large to encode");
  u4 delta = u4(deltax);

  CompactHashtableWriter::add(hash, delta);
--- a/hotspot/src/share/vm/classfile/javaClasses.cpp
+++ b/hotspot/src/share/vm/classfile/javaClasses.cpp
@ -3525,17 +3525,24 @@ bool java_lang_ClassLoader::is_trusted_loader(oop loader) {
  return false;
 }

-oop java_lang_ClassLoader::non_reflection_class_loader(oop loader) {
+// Return true if this is one of the class loaders associated with
+// the generated bytecodes for reflection.
+bool java_lang_ClassLoader::is_reflection_class_loader(oop loader) {
  if (loader != NULL) {
-    // See whether this is one of the class loaders associated with
-    // the generated bytecodes for reflection, and if so, "magically"
-    // delegate to its parent to prevent class loading from occurring
-    // in places where applications using reflection didn't expect it.
    Klass* delegating_cl_class = SystemDictionary::reflect_DelegatingClassLoader_klass();
    // This might be null in non-1.4 JDKs
-    if (delegating_cl_class != NULL && loader->is_a(delegating_cl_class)) {
-      return parent(loader);
-    }
+    return (delegating_cl_class != NULL && loader->is_a(delegating_cl_class));
+  }
+  return false;
+}
+
+oop java_lang_ClassLoader::non_reflection_class_loader(oop loader) {
+  // See whether this is one of the class loaders associated with
+  // the generated bytecodes for reflection, and if so, "magically"
+  // delegate to its parent to prevent class loading from occurring
+  // in places where applications using reflection didn't expect it.
+  if (is_reflection_class_loader(loader)) {
+    return parent(loader);
  }
  return loader;
 }
--- a/hotspot/src/share/vm/classfile/javaClasses.hpp
+++ b/hotspot/src/share/vm/classfile/javaClasses.hpp
@ -1243,6 +1243,10 @@ class java_lang_ClassLoader : AllStatic {

  static bool is_trusted_loader(oop loader);

+  // Return true if this is one of the class loaders associated with
+  // the generated bytecodes for reflection.
+  static bool is_reflection_class_loader(oop loader);
+
  // Fix for 4474172
  static oop  non_reflection_class_loader(oop loader);

--- a/hotspot/src/share/vm/classfile/jimage.hpp
+++ b/hotspot/src/share/vm/classfile/jimage.hpp
@ -94,7 +94,7 @@ typedef void (*JImageClose_t)(JImageFile* jimage);
 * Ex.
 *  const char* package = (*JImagePackageToModule)(image, "java/lang");
 *  tty->print_cr(package);
- *  —> java.base
+ *  -> java.base
 */

 extern "C" const char * JIMAGE_PackageToModule(JImageFile* jimage, const char* package_name);
@ -126,7 +126,7 @@ typedef JImageLocationRef(*JImageFindResource_t)(JImageFile* jimage,


 /*
- * JImageGetResource - Given an open image file (see JImageOpen), a resource’s
+ * JImageGetResource - Given an open image file (see JImageOpen), a resource's
 * location information (see JImageFindResource), a buffer of appropriate
 * size and the size, retrieve the bytes associated with the
 * resource. If the size is less than the resource size then the read is truncated.
@ -158,7 +158,7 @@ typedef jlong(*JImageGetResource_t)(JImageFile* jimage, JImageLocationRef locati
 * Ex.
 *   bool ctw_visitor(JImageFile* jimage, const char* module_name, const char* version,
 *                  const char* package, const char* name, const char* extension, void* arg) {
- *     if (strcmp(extension, “class”) == 0) {
+ *     if (strcmp(extension, "class") == 0) {
 *       char path[JIMAGE_MAX_PATH];
 *       Thread* THREAD = Thread::current();
 *       jio_snprintf(path, JIMAGE_MAX_PATH - 1, "/%s/%s", package, name);
--- a/hotspot/src/share/vm/classfile/moduleEntry.cpp
+++ b/hotspot/src/share/vm/classfile/moduleEntry.cpp
@ -54,6 +54,17 @@ void ModuleEntry::set_location(Symbol* location) {
  }
 }

+bool ModuleEntry::is_non_jdk_module() {
+  ResourceMark rm;
+  if (location() != NULL) {
+    const char* loc = location()->as_C_string();
+    if (strncmp(loc, "jrt:/java.", 10) != 0 && strncmp(loc, "jrt:/jdk.", 9) != 0) {
+      return true;
+    }
+  }
+  return false;
+}
+
 void ModuleEntry::set_version(Symbol* version) {
  if (_version != NULL) {
    // _version symbol's refcounts are managed by ModuleEntry,
--- a/hotspot/src/share/vm/classfile/moduleEntry.hpp
+++ b/hotspot/src/share/vm/classfile/moduleEntry.hpp
@ -38,6 +38,7 @@
 #define UNNAMED_MODULE "Unnamed Module"
 #define JAVAPKG "java/"
 #define JAVAPKG_LEN 5
+#define JAVA_BASE_NAME "java.base"

 class ModuleClosure;

@ -102,6 +103,7 @@ public:

  Symbol*          location() const                    { return _location; }
  void             set_location(Symbol* location);
+  bool             is_non_jdk_module();

  bool             can_read(ModuleEntry* m) const;
  bool             has_reads() const;
--- a/hotspot/src/share/vm/classfile/systemDictionary.cpp
+++ b/hotspot/src/share/vm/classfile/systemDictionary.cpp
@ -2897,11 +2897,11 @@ void SystemDictionary::verify() {
 // caller needs ResourceMark
 const char* SystemDictionary::loader_name(const oop loader) {
  return ((loader) == NULL ? "<bootloader>" :
-    InstanceKlass::cast((loader)->klass())->name()->as_C_string());
+          InstanceKlass::cast((loader)->klass())->name()->as_C_string());
 }

 // caller needs ResourceMark
 const char* SystemDictionary::loader_name(const ClassLoaderData* loader_data) {
  return (loader_data->class_loader() == NULL ? "<bootloader>" :
-    InstanceKlass::cast((loader_data->class_loader())->klass())->name()->as_C_string());
+          SystemDictionary::loader_name(loader_data->class_loader()));
 }
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
@ -228,6 +228,7 @@
                                                                                                  \
  /* Support for reflection based on dynamic bytecode generation (JDK 1.4 and above) */           \
                                                                                                  \
+  template(jdk_internal_reflect,                      "jdk/internal/reflect")                     \
  template(reflect_MagicAccessorImpl,                 "jdk/internal/reflect/MagicAccessorImpl")       \
  template(reflect_MethodAccessorImpl,                "jdk/internal/reflect/MethodAccessorImpl")      \
  template(reflect_ConstructorAccessorImpl,           "jdk/internal/reflect/ConstructorAccessorImpl") \
--- a/hotspot/src/share/vm/code/codeBlob.hpp
+++ b/hotspot/src/share/vm/code/codeBlob.hpp
@ -159,7 +159,8 @@ public:
  bool blob_contains(address addr) const         { return header_begin()       <= addr && addr < data_end();       }
  bool code_contains(address addr) const         { return code_begin()         <= addr && addr < code_end();       }
  bool contains(address addr) const              { return content_begin()      <= addr && addr < content_end();    }
-  bool is_frame_complete_at(address addr) const  { return code_contains(addr) && addr >= code_begin() + _frame_complete_offset; }
+  bool is_frame_complete_at(address addr) const  { return _frame_complete_offset != CodeOffsets::frame_never_safe &&
+                                                          code_contains(addr) && addr >= code_begin() + _frame_complete_offset; }

  // CodeCache support: really only used by the nmethods, but in order to get
  // asserts and certain bookkeeping to work in the CodeCache they are defined
--- a/hotspot/src/share/vm/code/compiledIC.cpp
+++ b/hotspot/src/share/vm/code/compiledIC.cpp
@ -460,9 +460,11 @@ void CompiledIC::set_to_monomorphic(CompiledICInfo& info) {
 }


-// is_optimized: Compiler has generated an optimized call (i.e., no inline
-// cache) static_bound: The call can be static bound (i.e, no need to use
-// inline cache)
+// is_optimized: Compiler has generated an optimized call (i.e. fixed, no inline cache)
+// static_bound: The call can be static bound. If it isn't also optimized, the property
+// wasn't provable at time of compilation. An optimized call will have any necessary
+// null check, while a static_bound won't. A static_bound (but not optimized) must
+// therefore use the unverified entry point.
 void CompiledIC::compute_monomorphic_entry(const methodHandle& method,
                                           KlassHandle receiver_klass,
                                           bool is_optimized,
@ -475,7 +477,23 @@ void CompiledIC::compute_monomorphic_entry(const methodHandle& method,
  if (method_code != NULL && method_code->is_in_use()) {
    assert(method_code->is_compiled(), "must be compiled");
    // Call to compiled code
-    if (static_bound || is_optimized) {
+    //
+    // Note: the following problem exists with Compiler1:
+    //   - at compile time we may or may not know if the destination is final
+    //   - if we know that the destination is final (is_optimized), we will emit
+    //     an optimized virtual call (no inline cache), and need a Method* to make
+    //     a call to the interpreter
+    //   - if we don't know if the destination is final, we emit a standard
+    //     virtual call, and use CompiledICHolder to call interpreted code
+    //     (no static call stub has been generated)
+    //   - In the case that we here notice the call is static bound we
+    //     convert the call into what looks to be an optimized virtual call,
+    //     but we must use the unverified entry point (since there will be no
+    //     null check on a call when the target isn't loaded).
+    //     This causes problems when verifying the IC because
+    //     it looks vanilla but is optimized. Code in is_call_to_interpreted
+    //     is aware of this and weakens its asserts.
+    if (is_optimized) {
      entry      = method_code->verified_entry_point();
    } else {
      entry      = method_code->entry_point();
@ -485,38 +503,6 @@ void CompiledIC::compute_monomorphic_entry(const methodHandle& method,
    // Call to compiled code
    info.set_compiled_entry(entry, (static_bound || is_optimized) ? NULL : receiver_klass(), is_optimized);
  } else {
-    // Note: the following problem exists with Compiler1:
-    //   - at compile time we may or may not know if the destination is final
-    //   - if we know that the destination is final, we will emit an optimized
-    //     virtual call (no inline cache), and need a Method* to make a call
-    //     to the interpreter
-    //   - if we do not know if the destination is final, we emit a standard
-    //     virtual call, and use CompiledICHolder to call interpreted code
-    //     (no static call stub has been generated)
-    //     However in that case we will now notice it is static_bound
-    //     and convert the call into what looks to be an optimized
-    //     virtual call. This causes problems in verifying the IC because
-    //     it look vanilla but is optimized. Code in is_call_to_interpreted
-    //     is aware of this and weakens its asserts.
-
-    // static_bound should imply is_optimized -- otherwise we have a
-    // performance bug (statically-bindable method is called via
-    // dynamically-dispatched call note: the reverse implication isn't
-    // necessarily true -- the call may have been optimized based on compiler
-    // analysis (static_bound is only based on "final" etc.)
-#ifdef COMPILER2
-#ifdef TIERED
-#if defined(ASSERT)
-    // can't check the assert because we don't have the CompiledIC with which to
-    // find the address if the call instruction.
-    //
-    // CodeBlob* cb = find_blob_unsafe(instruction_address());
-    // assert(cb->is_compiled_by_c1() || !static_bound || is_optimized, "static_bound should imply is_optimized");
-#endif // ASSERT
-#else
-    assert(!static_bound || is_optimized, "static_bound should imply is_optimized");
-#endif // TIERED
-#endif // COMPILER2
    if (is_optimized) {
      // Use stub entry
      info.set_interpreter_entry(method()->get_c2i_entry(), method());
--- a/hotspot/src/share/vm/compiler/compilerDirectives.hpp
+++ b/hotspot/src/share/vm/compiler/compilerDirectives.hpp
@ -64,7 +64,7 @@
    cflags(TraceOptoOutput,         bool, false, TraceOptoOutput) \
    cflags(TraceSpilling,           bool, TraceSpilling, TraceSpilling) \
    cflags(Vectorize,               bool, false, Vectorize) \
-    cflags(VectorizeDebug,          bool, false, VectorizeDebug) \
+    cflags(VectorizeDebug,         uintx, 0, VectorizeDebug) \
    cflags(CloneMapDebug,           bool, false, CloneMapDebug) \
    cflags(DoReserveCopyInSuperWordDebug, bool, false, DoReserveCopyInSuperWordDebug) \
    cflags(IGVPrintLevel,           intx, PrintIdealGraphLevel, IGVPrintLevel) \
@ -140,6 +140,7 @@ public:
  compilerdirectives_c1_flags(set_function_definition)

  void print_intx(outputStream* st, ccstr n, intx v, bool mod) { if (mod) { st->print("%s:" INTX_FORMAT " ", n, v); } }
+  void print_uintx(outputStream* st, ccstr n, intx v, bool mod) { if (mod) { st->print("%s:" UINTX_FORMAT " ", n, v); } }
  void print_bool(outputStream* st, ccstr n, bool v, bool mod) { if (mod) { st->print("%s:%s ", n, v ? "true" : "false"); } }
  void print_double(outputStream* st, ccstr n, double v, bool mod) { if (mod) { st->print("%s:%f ", n, v); } }
  void print_ccstr(outputStream* st, ccstr n, ccstr v, bool mod) { if (mod) { st->print("%s:%s ", n, v); } }
--- a/hotspot/src/share/vm/compiler/directivesParser.hpp
+++ b/hotspot/src/share/vm/compiler/directivesParser.hpp
@ -31,6 +31,7 @@
 enum FlagType {
  boolFlag,
  intxFlag,
+  uintxFlag,
  doubleFlag,
  ccstrFlag,
  ccstrlistFlag,
@ -40,6 +41,7 @@ enum FlagType {
 static const char* flag_type_names[] = {
    "bool",
    "int",
+    "uint",
    "double",
    "string",
    "string list",
--- a/hotspot/src/share/vm/gc/g1/concurrentMarkThread.cpp
+++ b/hotspot/src/share/vm/gc/g1/concurrentMarkThread.cpp
@ -156,9 +156,7 @@ void ConcurrentMarkThread::run_service() {
      jlong mark_start = os::elapsed_counter();
      log_info(gc, marking)("Concurrent Mark (%.3fs)", TimeHelper::counter_to_seconds(mark_start));

-      int iter = 0;
-      do {
-        iter++;
+      for (uint iter = 1; true; ++iter) {
        if (!cm()->has_aborted()) {
          G1ConcPhaseTimer t(_cm, "Concurrent Mark From Roots");
          _cm->mark_from_roots();
@ -178,11 +176,14 @@ void ConcurrentMarkThread::run_service() {
          VM_CGC_Operation op(&final_cl, "Pause Remark");
          VMThread::execute(&op);
        }
-        if (cm()->restart_for_overflow()) {
-          log_debug(gc, marking)("Restarting Concurrent Marking because of Mark Stack Overflow in Remark (Iteration #%d).", iter);
-          log_info(gc, marking)("Concurrent Mark Restart due to overflow");
+
+        if (!cm()->restart_for_overflow() || cm()->has_aborted()) {
+          break;
        }
-      } while (cm()->restart_for_overflow());
+
+        log_info(gc, marking)("Concurrent Mark Restart due to overflow"
+                              " (iteration #%u", iter);
+      }

      if (!cm()->has_aborted()) {
        G1ConcPhaseTimer t(_cm, "Concurrent Create Live Data");
--- a/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.cpp
+++ b/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.cpp
@ -227,7 +227,7 @@ HeapWord* G1BlockOffsetTablePart::forward_to_block_containing_addr_slow(HeapWord
    while (n <= next_boundary) {
      q = n;
      oop obj = oop(q);
-      if (obj->klass_or_null() == NULL) return q;
+      if (obj->klass_or_null_acquire() == NULL) return q;
      n += block_size(q);
    }
    assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
--- a/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp
+++ b/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp
@ -136,7 +136,7 @@ inline HeapWord* G1BlockOffsetTablePart::forward_to_block_containing_addr_const(
  while (n <= addr) {
    q = n;
    oop obj = oop(q);
-    if (obj->klass_or_null() == NULL) {
+    if (obj->klass_or_null_acquire() == NULL) {
      return q;
    }
    n += block_size(q);
@ -148,7 +148,7 @@ inline HeapWord* G1BlockOffsetTablePart::forward_to_block_containing_addr_const(

 inline HeapWord* G1BlockOffsetTablePart::forward_to_block_containing_addr(HeapWord* q,
                                                                          const void* addr) {
-  if (oop(q)->klass_or_null() == NULL) {
+  if (oop(q)->klass_or_null_acquire() == NULL) {
    return q;
  }
  HeapWord* n = q + block_size(q);
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp
@ -300,6 +300,8 @@ G1CollectedHeap::humongous_obj_allocate_initialize_regions(uint first,
  // thread to calculate the object size incorrectly.
  Copy::fill_to_words(new_obj, oopDesc::header_size(), 0);

+  // Next, pad out the unused tail of the last region with filler
+  // objects, for improved usage accounting.
  // How many words we use for filler objects.
  size_t word_fill_size = word_size_sum - word_size;

@ -426,8 +428,7 @@ HeapWord* G1CollectedHeap::humongous_obj_allocate(size_t word_size, AllocationCo
      log_debug(gc, ergo, heap)("Attempt heap expansion (humongous allocation request failed). Allocation request: " SIZE_FORMAT "B",
                                    word_size * HeapWordSize);

-
-      _hrm.expand_at(first, obj_regions);
+      _hrm.expand_at(first, obj_regions, workers());
      g1_policy()->record_new_heap_size(num_regions());

 #ifdef ASSERT
@ -739,7 +740,7 @@ bool G1CollectedHeap::alloc_archive_regions(MemRegion* ranges, size_t count) {

    // Perform the actual region allocation, exiting if it fails.
    // Then note how much new space we have allocated.
-    if (!_hrm.allocate_containing_regions(curr_range, &commits)) {
+    if (!_hrm.allocate_containing_regions(curr_range, &commits, workers())) {
      return false;
    }
    increase_used(word_size * HeapWordSize);
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp
@ -2009,10 +2009,10 @@ public:
  { }

  void operator()(oop obj) const {
-    guarantee(obj->is_oop(),
+    guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || obj->is_oop(),
              "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
              p2i(obj), _phase, _info);
-    guarantee(!_g1h->obj_in_cs(obj),
+    guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->obj_in_cs(obj),
              "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
              p2i(obj), _phase, _info);
  }
@ -2436,6 +2436,7 @@ bool G1CMTask::get_entries_from_global_stack() {
    if (elem == NULL) {
      break;
    }
+    assert(G1CMObjArrayProcessor::is_array_slice(elem) || elem->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(elem));
    bool success = _task_queue->push(elem);
    // We only call this when the local queue is empty or under a
    // given target limit. So, we do not expect this push to fail.
@ -2448,7 +2449,9 @@ bool G1CMTask::get_entries_from_global_stack() {
 }

 void G1CMTask::drain_local_queue(bool partially) {
-  if (has_aborted()) return;
+  if (has_aborted()) {
+    return;
+  }

  // Decide what the target size is, depending whether we're going to
  // drain it partially (so that other tasks can steal if they run out
@ -2464,12 +2467,7 @@ void G1CMTask::drain_local_queue(bool partially) {
    oop obj;
    bool ret = _task_queue->pop_local(obj);
    while (ret) {
-      assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
-      assert(!_g1h->is_on_master_free_list(
-                  _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
-
      scan_object(obj);
-
      if (_task_queue->size() <= target_size || has_aborted()) {
        ret = false;
      } else {
@ -2880,8 +2878,6 @@ void G1CMTask::do_marking_step(double time_target_ms,
    while (!has_aborted()) {
      oop obj;
      if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
-        assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
-               "any stolen object should be marked");
        scan_object(obj);

        // And since we're towards the end, let's totally drain the
@ -3003,6 +2999,7 @@ G1CMTask::G1CMTask(uint worker_id,
                   G1CMTaskQueueSet* task_queues)
  : _g1h(G1CollectedHeap::heap()),
    _worker_id(worker_id), _cm(cm),
+    _objArray_processor(this),
    _claimed(false),
    _nextMarkBitMap(NULL), _hash_seed(17),
    _task_queue(task_queue),
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp
@ -26,6 +26,7 @@
 #define SHARE_VM_GC_G1_G1CONCURRENTMARK_HPP

 #include "classfile/javaClasses.hpp"
+#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp"
 #include "gc/g1/g1RegionToSpaceMapper.hpp"
 #include "gc/g1/heapRegionSet.hpp"
 #include "gc/shared/taskqueue.hpp"
@ -706,11 +707,13 @@ private:
    words_scanned_period          = 12*1024,
    // The regular clock call is called once the number of visited
    // references reaches this limit
-    refs_reached_period           = 384,
+    refs_reached_period           = 1024,
    // Initial value for the hash seed, used in the work stealing code
    init_hash_seed                = 17
  };

+  G1CMObjArrayProcessor       _objArray_processor;
+
  uint                        _worker_id;
  G1CollectedHeap*            _g1h;
  G1ConcurrentMark*           _cm;
@ -826,8 +829,10 @@ private:
  bool is_below_finger(oop obj, HeapWord* global_finger) const;

  template<bool scan> void process_grey_object(oop obj);
-
 public:
+  // Apply the closure on the given area of the objArray. Return the number of words
+  // scanned.
+  inline size_t scan_objArray(objArrayOop obj, MemRegion mr);
  // It resets the task; it should be called right at the beginning of
  // a marking phase.
  void reset(G1CMBitMap* _nextMarkBitMap);
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp
@ -27,6 +27,7 @@

 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1ConcurrentMark.hpp"
+#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp"
 #include "gc/g1/suspendibleThreadSet.hpp"
 #include "gc/shared/taskqueue.inline.hpp"

@ -117,11 +118,11 @@ inline void G1CMTask::scan_object(oop obj) { process_grey_object<true>(obj); }

 inline void G1CMTask::push(oop obj) {
  HeapWord* objAddr = (HeapWord*) obj;
-  assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
-  assert(!_g1h->is_on_master_free_list(
+  assert(G1CMObjArrayProcessor::is_array_slice(obj) || _g1h->is_in_g1_reserved(objAddr), "invariant");
+  assert(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_on_master_free_list(
              _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
-  assert(!_g1h->is_obj_ill(obj), "invariant");
-  assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
+  assert(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_obj_ill(obj), "invariant");
+  assert(G1CMObjArrayProcessor::is_array_slice(obj) || _nextMarkBitMap->isMarked(objAddr), "invariant");

  if (!_task_queue->push(obj)) {
    // The local task queue looks full. We need to push some entries
@ -169,17 +170,26 @@ inline bool G1CMTask::is_below_finger(oop obj, HeapWord* global_finger) const {
 template<bool scan>
 inline void G1CMTask::process_grey_object(oop obj) {
  assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray");
-  assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
-
-  size_t obj_size = obj->size();
-  _words_scanned += obj_size;
+  assert(G1CMObjArrayProcessor::is_array_slice(obj) || _nextMarkBitMap->isMarked((HeapWord*) obj),
+         "Any stolen object should be a slice or marked");

  if (scan) {
-    obj->oop_iterate(_cm_oop_closure);
+    if (G1CMObjArrayProcessor::is_array_slice(obj)) {
+      _words_scanned += _objArray_processor.process_slice(obj);
+    } else if (G1CMObjArrayProcessor::should_be_sliced(obj)) {
+      _words_scanned += _objArray_processor.process_obj(obj);
+    } else {
+      _words_scanned += obj->oop_iterate_size(_cm_oop_closure);;
+    }
  }
  check_limits();
 }

+inline size_t G1CMTask::scan_objArray(objArrayOop obj, MemRegion mr) {
+  obj->oop_iterate(_cm_oop_closure, mr);
+  return mr.word_size();
+}
+
 inline void G1CMTask::make_reference_grey(oop obj) {
  if (_cm->par_mark(obj)) {
    // No OrderAccess:store_load() is needed. It is implicit in the
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp
@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/g1/g1ConcurrentMark.inline.hpp"
+#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp"
+
+oop G1CMObjArrayProcessor::encode_array_slice(HeapWord* addr) {
+  return oop((void*)((uintptr_t)addr | ArraySliceBit));
+}
+
+HeapWord* G1CMObjArrayProcessor::decode_array_slice(oop value) {
+  assert(is_array_slice(value), "Given value " PTR_FORMAT " is not an array slice", p2i(value));
+  return (HeapWord*)((uintptr_t)(void*)value & ~ArraySliceBit);
+}
+
+void G1CMObjArrayProcessor::push_array_slice(HeapWord* what) {
+  oop obj = encode_array_slice(what);
+  _task->push(obj);
+}
+
+size_t G1CMObjArrayProcessor::process_array_slice(objArrayOop obj, HeapWord* start_from, size_t remaining) {
+  size_t words_to_scan = MIN2(remaining, ObjArrayMarkingStride);
+
+  if (remaining > ObjArrayMarkingStride) {
+    push_array_slice(start_from + ObjArrayMarkingStride);
+  }
+
+  // Then process current area.
+  MemRegion mr(start_from, words_to_scan);
+  return _task->scan_objArray(obj, mr);
+}
+
+size_t G1CMObjArrayProcessor::process_obj(oop obj) {
+  assert(should_be_sliced(obj), "Must be an array object %d and large " SIZE_FORMAT, obj->is_objArray(), (size_t)obj->size());
+
+  return process_array_slice(objArrayOop(obj), (HeapWord*)obj, (size_t)objArrayOop(obj)->size());
+}
+
+size_t G1CMObjArrayProcessor::process_slice(oop obj) {
+  HeapWord* const decoded_address = decode_array_slice(obj);
+
+  // Find the start address of the objArrayOop.
+  // Shortcut the BOT access if the given address is from a humongous object. The BOT
+  // slide is fast enough for "smaller" objects in non-humongous regions, but is slower
+  // than directly using heap region table.
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  HeapRegion* r = g1h->heap_region_containing(decoded_address);
+
+  HeapWord* const start_address = r->is_humongous() ?
+                                  r->humongous_start_region()->bottom() :
+                                  g1h->block_start(decoded_address);
+
+  assert(oop(start_address)->is_objArray(), "Address " PTR_FORMAT " does not refer to an object array ", p2i(start_address));
+  assert(start_address < decoded_address,
+         "Object start address " PTR_FORMAT " must be smaller than decoded address " PTR_FORMAT,
+         p2i(start_address),
+         p2i(decoded_address));
+
+  objArrayOop objArray = objArrayOop(start_address);
+
+  size_t already_scanned = decoded_address - start_address;
+  size_t remaining = objArray->size() - already_scanned;
+
+  return process_array_slice(objArray, decoded_address, remaining);
+}
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp
@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP
+#define SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP
+
+#include "oops/oopsHierarchy.hpp"
+#include "memory/allocation.hpp"
+
+class G1CMTask;
+
+// Helper class to mark through large objArrays during marking in an efficient way.
+// Instead of pushing large object arrays, we push continuations onto the
+// mark stack. These continuations are identified by having their LSB set.
+// This allows incremental processing of large objects.
+class G1CMObjArrayProcessor VALUE_OBJ_CLASS_SPEC {
+private:
+  // The bit mask for the continuation indicator of elements on the mark stack.
+  static const size_t ArraySliceBit = 1;
+
+  // Reference to the task for doing the actual work.
+  G1CMTask* _task;
+
+  // Encodes the given address as a continuation "oop".
+  oop encode_array_slice(HeapWord* addr);
+  // Remove the continuation marker from the given oop from the mark stack.
+  HeapWord* decode_array_slice(oop value);
+
+  // Push the continuation at the given address onto the mark stack.
+  void push_array_slice(HeapWord* addr);
+
+  // Process (apply the closure) on the given continuation of the given objArray.
+  size_t process_array_slice(objArrayOop const obj, HeapWord* start_from, size_t remaining);
+public:
+  static bool is_array_slice(void* obj) { return ((uintptr_t)obj & ArraySliceBit) != 0; }
+
+  static bool should_be_sliced(oop obj);
+
+  G1CMObjArrayProcessor(G1CMTask* task) : _task(task) {
+  }
+
+  // Process the given continuation "oop". Returns the number of words scanned.
+  size_t process_slice(oop obj);
+  // Start processing the given objArrayOop by scanning the header and pushing its
+  // continuation.
+  size_t process_obj(oop obj);
+};
+
+#endif /* SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP */
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp
@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_INLINE_HPP
+#define SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_INLINE_HPP
+
+#include "oops/oop.inline.hpp"
+#include "oops/oopsHierarchy.hpp"
+#include "runtime/globals.hpp"
+
+inline bool G1CMObjArrayProcessor::should_be_sliced(oop obj) {
+  return obj->is_objArray() && ((size_t)((objArrayOop)obj)->size()) >= 2 * ObjArrayMarkingStride;
+}
+
+#endif /* SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_INLINE_HPP */
--- a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp
+++ b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp
@ -36,7 +36,9 @@
 static const char* Indents[5] = {"", "  ", "    ", "      ", "        "};

 G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
-  _max_gc_threads(max_gc_threads)
+  _max_gc_threads(max_gc_threads),
+  _gc_start_counter(0),
+  _gc_pause_time_ms(0.0)
 {
  assert(max_gc_threads > 0, "Must have some GC threads");

@ -95,13 +97,40 @@ G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
  _gc_par_phases[NonYoungFreeCSet] = new WorkerDataArray<double>(max_gc_threads, "Non-Young Free Collection Set (ms):");

  _gc_par_phases[PreserveCMReferents] = new WorkerDataArray<double>(max_gc_threads, "Parallel Preserve CM Refs (ms):");
+
+  reset();
 }

-void G1GCPhaseTimes::note_gc_start() {
-  _gc_start_counter = os::elapsed_counter();
+void G1GCPhaseTimes::reset() {
+  _cur_collection_par_time_ms = 0.0;
+  _cur_collection_code_root_fixup_time_ms = 0.0;
+  _cur_strong_code_root_purge_time_ms = 0.0;
+  _cur_evac_fail_recalc_used = 0.0;
+  _cur_evac_fail_restore_remsets = 0.0;
+  _cur_evac_fail_remove_self_forwards = 0.0;
+  _cur_string_dedup_fixup_time_ms = 0.0;
+  _cur_clear_ct_time_ms = 0.0;
  _cur_expand_heap_time_ms = 0.0;
+  _cur_ref_proc_time_ms = 0.0;
+  _cur_ref_enq_time_ms = 0.0;
+  _cur_collection_start_sec = 0.0;
+  _root_region_scan_wait_time_ms = 0.0;
  _external_accounted_time_ms = 0.0;
  _recorded_clear_claimed_marks_time_ms = 0.0;
+  _recorded_young_cset_choice_time_ms = 0.0;
+  _recorded_non_young_cset_choice_time_ms = 0.0;
+  _recorded_redirty_logged_cards_time_ms = 0.0;
+  _recorded_preserve_cm_referents_time_ms = 0.0;
+  _recorded_merge_pss_time_ms = 0.0;
+  _recorded_total_free_cset_time_ms = 0.0;
+  _recorded_serial_free_cset_time_ms = 0.0;
+  _cur_fast_reclaim_humongous_time_ms = 0.0;
+  _cur_fast_reclaim_humongous_register_time_ms = 0.0;
+  _cur_fast_reclaim_humongous_total = 0;
+  _cur_fast_reclaim_humongous_candidates = 0;
+  _cur_fast_reclaim_humongous_reclaimed = 0;
+  _cur_verify_before_time_ms = 0.0;
+  _cur_verify_after_time_ms = 0.0;

  for (int i = 0; i < GCParPhasesSentinel; i++) {
    if (_gc_par_phases[i] != NULL) {
@ -110,6 +139,11 @@ void G1GCPhaseTimes::note_gc_start() {
  }
 }

+void G1GCPhaseTimes::note_gc_start() {
+  _gc_start_counter = os::elapsed_counter();
+  reset();
+}
+
 #define ASSERT_PHASE_UNINITIALIZED(phase) \
    assert(_gc_par_phases[phase]->get(i) == uninitialized, "Phase " #phase " reported for thread that was not started");

@ -184,7 +218,7 @@ size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase) {
 }

 template <class T>
-void G1GCPhaseTimes::details(T* phase, const char* indent) {
+void G1GCPhaseTimes::details(T* phase, const char* indent) const {
  Log(gc, phases, task) log;
  if (log.is_level(LogLevel::Trace)) {
    outputStream* trace_out = log.trace_stream();
@ -193,7 +227,7 @@ void G1GCPhaseTimes::details(T* phase, const char* indent) {
  }
 }

-void G1GCPhaseTimes::log_phase(WorkerDataArray<double>* phase, uint indent, outputStream* out, bool print_sum) {
+void G1GCPhaseTimes::log_phase(WorkerDataArray<double>* phase, uint indent, outputStream* out, bool print_sum) const {
  out->print("%s", Indents[indent]);
  phase->print_summary_on(out, print_sum);
  details(phase, Indents[indent]);
@ -206,7 +240,7 @@ void G1GCPhaseTimes::log_phase(WorkerDataArray<double>* phase, uint indent, outp
  }
 }

-void G1GCPhaseTimes::debug_phase(WorkerDataArray<double>* phase) {
+void G1GCPhaseTimes::debug_phase(WorkerDataArray<double>* phase) const {
  Log(gc, phases) log;
  if (log.is_level(LogLevel::Debug)) {
    ResourceMark rm;
@ -214,7 +248,7 @@ void G1GCPhaseTimes::debug_phase(WorkerDataArray<double>* phase) {
  }
 }

-void G1GCPhaseTimes::trace_phase(WorkerDataArray<double>* phase, bool print_sum) {
+void G1GCPhaseTimes::trace_phase(WorkerDataArray<double>* phase, bool print_sum) const {
  Log(gc, phases) log;
  if (log.is_level(LogLevel::Trace)) {
    ResourceMark rm;
@ -222,37 +256,50 @@ void G1GCPhaseTimes::trace_phase(WorkerDataArray<double>* phase, bool print_sum)
  }
 }

-#define PHASE_DOUBLE_FORMAT "%s%s: %.1lfms"
-#define PHASE_SIZE_FORMAT "%s%s: " SIZE_FORMAT
+#define TIME_FORMAT "%.1lfms"

-#define info_line(str, value) \
-  log_info(gc, phases)(PHASE_DOUBLE_FORMAT, Indents[1], str, value);
+void G1GCPhaseTimes::info_time(const char* name, double value) const {
+  log_info(gc, phases)("%s%s: " TIME_FORMAT, Indents[1], name, value);
+}

-#define debug_line(str, value) \
-  log_debug(gc, phases)(PHASE_DOUBLE_FORMAT, Indents[2], str, value);
+void G1GCPhaseTimes::debug_time(const char* name, double value) const {
+  log_debug(gc, phases)("%s%s: " TIME_FORMAT, Indents[2], name, value);
+}

-#define trace_line(str, value) \
-  log_trace(gc, phases)(PHASE_DOUBLE_FORMAT, Indents[3], str, value);
+void G1GCPhaseTimes::trace_time(const char* name, double value) const {
+  log_trace(gc, phases)("%s%s: " TIME_FORMAT, Indents[3], name, value);
+}

-#define trace_line_sz(str, value) \
-  log_trace(gc, phases)(PHASE_SIZE_FORMAT, Indents[3], str, value);
+void G1GCPhaseTimes::trace_count(const char* name, size_t value) const {
+  log_trace(gc, phases)("%s%s: " SIZE_FORMAT, Indents[3], name, value);
+}

-#define trace_line_ms(str, value) \
-  log_trace(gc, phases)(PHASE_SIZE_FORMAT, Indents[3], str, value);
+double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
+  const double sum_ms = _root_region_scan_wait_time_ms +
+                        _recorded_young_cset_choice_time_ms +
+                        _recorded_non_young_cset_choice_time_ms +
+                        _cur_fast_reclaim_humongous_register_time_ms;

-#define info_line_and_account(str, value) \
-  info_line(str, value);                  \
-  accounted_time_ms += value;
+  info_time("Pre Evacuate Collection Set", sum_ms);

-void G1GCPhaseTimes::print() {
-  note_gc_end();
-
-  double accounted_time_ms = _external_accounted_time_ms;
  if (_root_region_scan_wait_time_ms > 0.0) {
-    info_line_and_account("Root Region Scan Waiting", _root_region_scan_wait_time_ms);
+    debug_time("Root Region Scan Waiting", _root_region_scan_wait_time_ms);
+  }
+  debug_time("Choose Collection Set", (_recorded_young_cset_choice_time_ms + _recorded_non_young_cset_choice_time_ms));
+  if (G1EagerReclaimHumongousObjects) {
+    debug_time("Humongous Register", _cur_fast_reclaim_humongous_register_time_ms);
+    trace_count("Humongous Total", _cur_fast_reclaim_humongous_total);
+    trace_count("Humongous Candidate", _cur_fast_reclaim_humongous_candidates);
  }

-  info_line_and_account("Evacuate Collection Set", _cur_collection_par_time_ms);
+  return sum_ms;
+}
+
+double G1GCPhaseTimes::print_evacuate_collection_set() const {
+  const double sum_ms = _cur_collection_par_time_ms;
+
+  info_time("Evacuate Collection Set", sum_ms);
+
  trace_phase(_gc_par_phases[GCWorkerStart], false);
  debug_phase(_gc_par_phases[ExtRootScan]);
  for (int i = ThreadRoots; i <= SATBFiltering; i++) {
@ -270,57 +317,98 @@ void G1GCPhaseTimes::print() {
  debug_phase(_gc_par_phases[GCWorkerTotal]);
  trace_phase(_gc_par_phases[GCWorkerEnd], false);

-  info_line_and_account("Code Roots", _cur_collection_code_root_fixup_time_ms + _cur_strong_code_root_purge_time_ms);
-  debug_line("Code Roots Fixup", _cur_collection_code_root_fixup_time_ms);
-  debug_line("Code Roots Purge", _cur_strong_code_root_purge_time_ms);
+  return sum_ms;
+}
+
+double G1GCPhaseTimes::print_post_evacuate_collection_set() const {
+  const double evac_fail_handling = _cur_evac_fail_recalc_used +
+                                    _cur_evac_fail_remove_self_forwards +
+                                    _cur_evac_fail_restore_remsets;
+  const double sum_ms = evac_fail_handling +
+                        _cur_collection_code_root_fixup_time_ms +
+                        _recorded_preserve_cm_referents_time_ms +
+                        _cur_ref_proc_time_ms +
+                        _cur_ref_enq_time_ms +
+                        _cur_clear_ct_time_ms +
+                        _recorded_merge_pss_time_ms +
+                        _cur_strong_code_root_purge_time_ms +
+                        _recorded_redirty_logged_cards_time_ms +
+                        _recorded_clear_claimed_marks_time_ms +
+                        _recorded_total_free_cset_time_ms +
+                        _cur_fast_reclaim_humongous_time_ms +
+                        _cur_expand_heap_time_ms +
+                        _cur_string_dedup_fixup_time_ms;
+
+  info_time("Post Evacuate Collection Set", sum_ms);
+
+  debug_time("Code Roots Fixup", _cur_collection_code_root_fixup_time_ms);
+
+  debug_time("Preserve CM Refs", _recorded_preserve_cm_referents_time_ms);
+  trace_phase(_gc_par_phases[PreserveCMReferents]);
+
+  debug_time("Reference Processing", _cur_ref_proc_time_ms);

  if (G1StringDedup::is_enabled()) {
-    info_line_and_account("String Dedup Fixup", _cur_string_dedup_fixup_time_ms);
+    debug_time("String Dedup Fixup", _cur_string_dedup_fixup_time_ms);
    debug_phase(_gc_par_phases[StringDedupQueueFixup]);
    debug_phase(_gc_par_phases[StringDedupTableFixup]);
  }
-  info_line_and_account("Clear Card Table", _cur_clear_ct_time_ms);
-  info_line_and_account("Expand Heap After Collection", _cur_expand_heap_time_ms);

-  info_line_and_account("Free Collection Set", _recorded_total_free_cset_time_ms);
-  debug_line("Free Collection Set Serial", _recorded_serial_free_cset_time_ms);
-  debug_phase(_gc_par_phases[YoungFreeCSet]);
-  debug_phase(_gc_par_phases[NonYoungFreeCSet]);
+  debug_time("Clear Card Table", _cur_clear_ct_time_ms);

-  info_line_and_account("Merge Per-Thread State", _recorded_merge_pss_time_ms);
-
-  info_line("Other", _gc_pause_time_ms - accounted_time_ms);
-  if (_cur_verify_before_time_ms > 0.0) {
-    debug_line("Verify Before", _cur_verify_before_time_ms);
-  }
  if (G1CollectedHeap::heap()->evacuation_failed()) {
-    double evac_fail_handling = _cur_evac_fail_recalc_used + _cur_evac_fail_remove_self_forwards +
-      _cur_evac_fail_restore_remsets;
-    debug_line("Evacuation Failure", evac_fail_handling);
-    trace_line("Recalculate Used", _cur_evac_fail_recalc_used);
-    trace_line("Remove Self Forwards",_cur_evac_fail_remove_self_forwards);
-    trace_line("Restore RemSet", _cur_evac_fail_restore_remsets);
+    debug_time("Evacuation Failure", evac_fail_handling);
+    trace_time("Recalculate Used", _cur_evac_fail_recalc_used);
+    trace_time("Remove Self Forwards",_cur_evac_fail_remove_self_forwards);
+    trace_time("Restore RemSet", _cur_evac_fail_restore_remsets);
  }
-  debug_line("Choose CSet", (_recorded_young_cset_choice_time_ms + _recorded_non_young_cset_choice_time_ms));
-  debug_line("Preserve CM Refs", _recorded_preserve_cm_referents_time_ms);
-  trace_phase(_gc_par_phases[PreserveCMReferents]);
-  debug_line("Reference Processing", _cur_ref_proc_time_ms);
-  debug_line("Reference Enqueuing", _cur_ref_enq_time_ms);
-  debug_line("Redirty Cards", _recorded_redirty_logged_cards_time_ms);
+
+  debug_time("Reference Enqueuing", _cur_ref_enq_time_ms);
+
+  debug_time("Merge Per-Thread State", _recorded_merge_pss_time_ms);
+  debug_time("Code Roots Purge", _cur_strong_code_root_purge_time_ms);
+
+  debug_time("Redirty Cards", _recorded_redirty_logged_cards_time_ms);
  if (_recorded_clear_claimed_marks_time_ms > 0.0) {
-    debug_line("Clear Claimed Marks", _recorded_clear_claimed_marks_time_ms);
+    debug_time("Clear Claimed Marks", _recorded_clear_claimed_marks_time_ms);
  }

  trace_phase(_gc_par_phases[RedirtyCards]);
+
+  debug_time("Free Collection Set", _recorded_total_free_cset_time_ms);
+  trace_time("Free Collection Set Serial", _recorded_serial_free_cset_time_ms);
+  trace_phase(_gc_par_phases[YoungFreeCSet]);
+  trace_phase(_gc_par_phases[NonYoungFreeCSet]);
+
  if (G1EagerReclaimHumongousObjects) {
-    debug_line("Humongous Register", _cur_fast_reclaim_humongous_register_time_ms);
-    trace_line_sz("Humongous Total", _cur_fast_reclaim_humongous_total);
-    trace_line_sz("Humongous Candidate", _cur_fast_reclaim_humongous_candidates);
-    debug_line("Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms);
-    trace_line_sz("Humongous Reclaimed", _cur_fast_reclaim_humongous_reclaimed);
+    debug_time("Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms);
+    trace_count("Humongous Reclaimed", _cur_fast_reclaim_humongous_reclaimed);
  }
+  debug_time("Expand Heap After Collection", _cur_expand_heap_time_ms);
+
+
+  return sum_ms;
+}
+
+void G1GCPhaseTimes::print_other(double accounted_ms) const {
+  info_time("Other", _gc_pause_time_ms - accounted_ms);
+}
+
+void G1GCPhaseTimes::print() {
+  note_gc_end();
+
+  if (_cur_verify_before_time_ms > 0.0) {
+    debug_time("Verify Before", _cur_verify_before_time_ms);
+  }
+
+  double accounted_ms = 0.0;
+  accounted_ms += print_pre_evacuate_collection_set();
+  accounted_ms += print_evacuate_collection_set();
+  accounted_ms += print_post_evacuate_collection_set();
+  print_other(accounted_ms);
+
  if (_cur_verify_after_time_ms > 0.0) {
-    debug_line("Verify After", _cur_verify_after_time_ms);
+    debug_time("Verify After", _cur_verify_after_time_ms);
  }
 }

--- a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp
+++ b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp
@ -25,6 +25,7 @@
 #ifndef SHARE_VM_GC_G1_G1GCPHASETIMES_HPP
 #define SHARE_VM_GC_G1_G1GCPHASETIMES_HPP

+#include "logging/logLevel.hpp"
 #include "memory/allocation.hpp"

 class LineBuffer;
@ -129,12 +130,24 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {

  double worker_time(GCParPhases phase, uint worker);
  void note_gc_end();
+  void reset();

  template <class T>
-  void details(T* phase, const char* indent);
-  void log_phase(WorkerDataArray<double>* phase, uint indent, outputStream* out, bool print_sum);
-  void debug_phase(WorkerDataArray<double>* phase);
-  void trace_phase(WorkerDataArray<double>* phase, bool print_sum = true);
+  void details(T* phase, const char* indent) const;
+
+  void log_phase(WorkerDataArray<double>* phase, uint indent, outputStream* out, bool print_sum) const;
+  void debug_phase(WorkerDataArray<double>* phase) const;
+  void trace_phase(WorkerDataArray<double>* phase, bool print_sum = true) const;
+
+  void info_time(const char* name, double value) const;
+  void debug_time(const char* name, double value) const;
+  void trace_time(const char* name, double value) const;
+  void trace_count(const char* name, size_t value) const;
+
+  double print_pre_evacuate_collection_set() const;
+  double print_evacuate_collection_set() const;
+  double print_post_evacuate_collection_set() const;
+  void print_other(double accounted_ms) const;

 public:
  G1GCPhaseTimes(uint max_gc_threads);
--- a/hotspot/src/share/vm/gc/g1/g1PageBasedVirtualSpace.cpp
+++ b/hotspot/src/share/vm/gc/g1/g1PageBasedVirtualSpace.cpp
@ -235,11 +235,12 @@ private:
 public:
  G1PretouchTask(char* start_address, char* end_address, size_t page_size) :
    AbstractGangTask("G1 PreTouch",
-                     Universe::is_fully_initialized() ? GCId::current_raw() :
-                                                        // During VM initialization there is
-                                                        // no GC cycle that this task can be
-                                                        // associated with.
-                                                        GCId::undefined()),
+                     Universe::is_fully_initialized() &&
+                     Thread::current()->is_Named_thread() ? GCId::current_raw() :
+                                                            // During VM initialization there is
+                                                            // no GC cycle that this task can be
+                                                            // associated with.
+                                                            GCId::undefined()),
    _cur_addr(start_address),
    _start_addr(start_address),
    _end_addr(end_address),
@ -262,15 +263,20 @@ public:
 };

 void G1PageBasedVirtualSpace::pretouch(size_t start_page, size_t size_in_pages, WorkGang* pretouch_gang) {
-  guarantee(pretouch_gang != NULL, "No pretouch gang specified.");
-
-  size_t num_chunks = MAX2((size_t)1, size_in_pages * _page_size / MAX2(G1PretouchTask::chunk_size(), _page_size));
-
-  uint num_workers = MIN2((uint)num_chunks, pretouch_gang->active_workers());
  G1PretouchTask cl(page_start(start_page), bounded_end_addr(start_page + size_in_pages), _page_size);
-  log_debug(gc, heap)("Running %s with %u workers for " SIZE_FORMAT " work units pre-touching " SIZE_FORMAT "B.",
-                      cl.name(), num_workers, num_chunks, size_in_pages * _page_size);
-  pretouch_gang->run_task(&cl, num_workers);
+
+  if (pretouch_gang != NULL) {
+    size_t num_chunks = MAX2((size_t)1, size_in_pages * _page_size / MAX2(G1PretouchTask::chunk_size(), _page_size));
+
+    uint num_workers = MIN2((uint)num_chunks, pretouch_gang->active_workers());
+    log_debug(gc, heap)("Running %s with %u workers for " SIZE_FORMAT " work units pre-touching " SIZE_FORMAT "B.",
+                        cl.name(), num_workers, num_chunks, size_in_pages * _page_size);
+    pretouch_gang->run_task(&cl, num_workers);
+  } else {
+    log_debug(gc, heap)("Running %s pre-touching " SIZE_FORMAT "B.",
+                        cl.name(), size_in_pages * _page_size);
+    cl.work(0);
+  }
 }

 bool G1PageBasedVirtualSpace::contains(const void* p) const {
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.cpp
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.cpp
@ -575,18 +575,26 @@ bool G1RemSet::refine_card(jbyte* card_ptr,
  // And find the region containing it.
  HeapRegion* r = _g1->heap_region_containing(start);

-  // Why do we have to check here whether a card is on a young region,
-  // given that we dirty young regions and, as a result, the
-  // post-barrier is supposed to filter them out and never to enqueue
-  // them? When we allocate a new region as the "allocation region" we
-  // actually dirty its cards after we release the lock, since card
-  // dirtying while holding the lock was a performance bottleneck. So,
-  // as a result, it is possible for other threads to actually
-  // allocate objects in the region (after the acquire the lock)
-  // before all the cards on the region are dirtied. This is unlikely,
-  // and it doesn't happen often, but it can happen. So, the extra
-  // check below filters out those cards.
-  if (r->is_young()) {
+  // This check is needed for some uncommon cases where we should
+  // ignore the card.
+  //
+  // The region could be young.  Cards for young regions are
+  // distinctly marked (set to g1_young_gen), so the post-barrier will
+  // filter them out.  However, that marking is performed
+  // concurrently.  A write to a young object could occur before the
+  // card has been marked young, slipping past the filter.
+  //
+  // The card could be stale, because the region has been freed since
+  // the card was recorded. In this case the region type could be
+  // anything.  If (still) free or (reallocated) young, just ignore
+  // it.  If (reallocated) old or humongous, the later card trimming
+  // and additional checks in iteration may detect staleness.  At
+  // worst, we end up processing a stale card unnecessarily.
+  //
+  // In the normal (non-stale) case, the synchronization between the
+  // enqueueing of the card and processing it here will have ensured
+  // we see the up-to-date region type here.
+  if (!r->is_old_or_humongous()) {
    return false;
  }

@ -617,26 +625,69 @@ bool G1RemSet::refine_card(jbyte* card_ptr,
    assert(!check_for_refs_into_cset, "sanity");
    assert(!SafepointSynchronize::is_at_safepoint(), "sanity");

+    const jbyte* orig_card_ptr = card_ptr;
    card_ptr = _hot_card_cache->insert(card_ptr);
    if (card_ptr == NULL) {
      // There was no eviction. Nothing to do.
      return false;
-    }
+    } else if (card_ptr != orig_card_ptr) {
+      // Original card was inserted and an old card was evicted.
+      start = _ct_bs->addr_for(card_ptr);
+      r = _g1->heap_region_containing(start);

-    start = _ct_bs->addr_for(card_ptr);
-    r = _g1->heap_region_containing(start);
-
-    // Checking whether the region we got back from the cache
-    // is young here is inappropriate. The region could have been
-    // freed, reallocated and tagged as young while in the cache.
-    // Hence we could see its young type change at any time.
+      // Check whether the region formerly in the cache should be
+      // ignored, as discussed earlier for the original card.  The
+      // region could have been freed while in the cache.  The cset is
+      // not relevant here, since we're in concurrent phase.
+      if (!r->is_old_or_humongous()) {
+        return false;
+      }
+    } // Else we still have the original card.
  }

+  // Trim the region designated by the card to what's been allocated
+  // in the region.  The card could be stale, or the card could cover
+  // (part of) an object at the end of the allocated space and extend
+  // beyond the end of allocation.
+  HeapWord* scan_limit;
+  if (_g1->is_gc_active()) {
+    // If we're in a STW GC, then a card might be in a GC alloc region
+    // and extend onto a GC LAB, which may not be parsable.  Stop such
+    // at the "scan_top" of the region.
+    scan_limit = r->scan_top();
+  } else {
+    // Non-humongous objects are only allocated in the old-gen during
+    // GC, so if region is old then top is stable.  Humongous object
+    // allocation sets top last; if top has not yet been set, this is
+    // a stale card and we'll end up with an empty intersection.  If
+    // this is not a stale card, the synchronization between the
+    // enqueuing of the card and processing it here will have ensured
+    // we see the up-to-date top here.
+    scan_limit = r->top();
+  }
+  if (scan_limit <= start) {
+    // If the trimmed region is empty, the card must be stale.
+    return false;
+  }
+
+  // Okay to clean and process the card now.  There are still some
+  // stale card cases that may be detected by iteration and dealt with
+  // as iteration failure.
+  *const_cast<volatile jbyte*>(card_ptr) = CardTableModRefBS::clean_card_val();
+
+  // This fence serves two purposes.  First, the card must be cleaned
+  // before processing the contents.  Second, we can't proceed with
+  // processing until after the read of top, for synchronization with
+  // possibly concurrent humongous object allocation.  It's okay that
+  // reading top and reading type were racy wrto each other.  We need
+  // both set, in any order, to proceed.
+  OrderAccess::fence();
+
  // Don't use addr_for(card_ptr + 1) which can ask for
-  // a card beyond the heap.  This is not safe without a perm
-  // gen at the upper end of the heap.
-  HeapWord* end   = start + CardTableModRefBS::card_size_in_words;
-  MemRegion dirtyRegion(start, end);
+  // a card beyond the heap.
+  HeapWord* end = start + CardTableModRefBS::card_size_in_words;
+  MemRegion dirty_region(start, MIN2(scan_limit, end));
+  assert(!dirty_region.is_empty(), "sanity");

  G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
                                                 _g1->g1_rem_set(),
@ -655,29 +706,15 @@ bool G1RemSet::refine_card(jbyte* card_ptr,
                                (OopClosure*)&mux :
                                (OopClosure*)&update_rs_oop_cl));

-  // The region for the current card may be a young region. The
-  // current card may have been a card that was evicted from the
-  // card cache. When the card was inserted into the cache, we had
-  // determined that its region was non-young. While in the cache,
-  // the region may have been freed during a cleanup pause, reallocated
-  // and tagged as young.
-  //
-  // We wish to filter out cards for such a region but the current
-  // thread, if we're running concurrently, may "see" the young type
-  // change at any time (so an earlier "is_young" check may pass or
-  // fail arbitrarily). We tell the iteration code to perform this
-  // filtering when it has been determined that there has been an actual
-  // allocation in this region and making it safe to check the young type.
-
  bool card_processed =
-    r->oops_on_card_seq_iterate_careful(dirtyRegion,
-                                        &filter_then_update_rs_oop_cl,
-                                        card_ptr);
+    r->oops_on_card_seq_iterate_careful(dirty_region,
+                                        &filter_then_update_rs_oop_cl);

  // If unable to process the card then we encountered an unparsable
-  // part of the heap (e.g. a partially allocated object).  Redirty
-  // and re-enqueue: if we put off the card until a GC pause, then the
-  // allocation will have completed.
+  // part of the heap (e.g. a partially allocated object) while
+  // processing a stale card.  Despite the card being stale, redirty
+  // and re-enqueue, because we've already cleaned the card.  Without
+  // this we could incorrectly discard a non-stale card.
  if (!card_processed) {
    assert(!_g1->is_gc_active(), "Unparsable heap during GC");
    // The card might have gotten re-dirtied and re-enqueued while we
--- a/hotspot/src/share/vm/gc/g1/g1SATBCardTableModRefBS.cpp
+++ b/hotspot/src/share/vm/gc/g1/g1SATBCardTableModRefBS.cpp
@ -178,44 +178,37 @@ G1SATBCardTableLoggingModRefBS::write_ref_field_work(void* field,
 }

 void
-G1SATBCardTableLoggingModRefBS::invalidate(MemRegion mr, bool whole_heap) {
+G1SATBCardTableLoggingModRefBS::invalidate(MemRegion mr) {
  volatile jbyte* byte = byte_for(mr.start());
  jbyte* last_byte = byte_for(mr.last());
  Thread* thr = Thread::current();
-  if (whole_heap) {
-    while (byte <= last_byte) {
-      *byte = dirty_card;
-      byte++;
-    }
-  } else {
    // skip all consecutive young cards
-    for (; byte <= last_byte && *byte == g1_young_gen; byte++);
+  for (; byte <= last_byte && *byte == g1_young_gen; byte++);

-    if (byte <= last_byte) {
-      OrderAccess::storeload();
-      // Enqueue if necessary.
-      if (thr->is_Java_thread()) {
-        JavaThread* jt = (JavaThread*)thr;
-        for (; byte <= last_byte; byte++) {
-          if (*byte == g1_young_gen) {
-            continue;
-          }
-          if (*byte != dirty_card) {
-            *byte = dirty_card;
-            jt->dirty_card_queue().enqueue(byte);
-          }
+  if (byte <= last_byte) {
+    OrderAccess::storeload();
+    // Enqueue if necessary.
+    if (thr->is_Java_thread()) {
+      JavaThread* jt = (JavaThread*)thr;
+      for (; byte <= last_byte; byte++) {
+        if (*byte == g1_young_gen) {
+          continue;
        }
-      } else {
-        MutexLockerEx x(Shared_DirtyCardQ_lock,
-                        Mutex::_no_safepoint_check_flag);
-        for (; byte <= last_byte; byte++) {
-          if (*byte == g1_young_gen) {
-            continue;
-          }
-          if (*byte != dirty_card) {
-            *byte = dirty_card;
-            _dcqs.shared_dirty_card_queue()->enqueue(byte);
-          }
+        if (*byte != dirty_card) {
+          *byte = dirty_card;
+          jt->dirty_card_queue().enqueue(byte);
+        }
+      }
+    } else {
+      MutexLockerEx x(Shared_DirtyCardQ_lock,
+                      Mutex::_no_safepoint_check_flag);
+      for (; byte <= last_byte; byte++) {
+        if (*byte == g1_young_gen) {
+          continue;
+        }
+        if (*byte != dirty_card) {
+          *byte = dirty_card;
+          _dcqs.shared_dirty_card_queue()->enqueue(byte);
        }
      }
    }
--- a/hotspot/src/share/vm/gc/g1/g1SATBCardTableModRefBS.hpp
+++ b/hotspot/src/share/vm/gc/g1/g1SATBCardTableModRefBS.hpp
@ -152,7 +152,7 @@ class G1SATBCardTableLoggingModRefBS: public G1SATBCardTableModRefBS {

  // NB: if you do a whole-heap invalidation, the "usual invariant" defined
  // above no longer applies.
-  void invalidate(MemRegion mr, bool whole_heap = false);
+  void invalidate(MemRegion mr);

  void write_region_work(MemRegion mr)    { invalidate(mr); }
  void write_ref_array_work(MemRegion mr) { invalidate(mr); }
--- a/hotspot/src/share/vm/gc/g1/heapRegion.cpp
+++ b/hotspot/src/share/vm/gc/g1/heapRegion.cpp
@ -352,89 +352,101 @@ void HeapRegion::note_self_forwarding_removal_end(bool during_initial_mark,
  _prev_marked_bytes = marked_bytes;
 }

+// Humongous objects are allocated directly in the old-gen.  Need
+// special handling for concurrent processing encountering an
+// in-progress allocation.
+static bool do_oops_on_card_in_humongous(MemRegion mr,
+                                         FilterOutOfRegionClosure* cl,
+                                         HeapRegion* hr,
+                                         G1CollectedHeap* g1h) {
+  assert(hr->is_humongous(), "precondition");
+  HeapRegion* sr = hr->humongous_start_region();
+  oop obj = oop(sr->bottom());
+
+  // If concurrent and klass_or_null is NULL, then space has been
+  // allocated but the object has not yet been published by setting
+  // the klass.  That can only happen if the card is stale.  However,
+  // we've already set the card clean, so we must return failure,
+  // since the allocating thread could have performed a write to the
+  // card that might be missed otherwise.
+  if (!g1h->is_gc_active() && (obj->klass_or_null_acquire() == NULL)) {
+    return false;
+  }
+
+  // We have a well-formed humongous object at the start of sr.
+  // Only filler objects follow a humongous object in the containing
+  // regions, and we can ignore those.  So only process the one
+  // humongous object.
+  if (!g1h->is_obj_dead(obj, sr)) {
+    if (obj->is_objArray() || (sr->bottom() < mr.start())) {
+      // objArrays are always marked precisely, so limit processing
+      // with mr.  Non-objArrays might be precisely marked, and since
+      // it's humongous it's worthwhile avoiding full processing.
+      // However, the card could be stale and only cover filler
+      // objects.  That should be rare, so not worth checking for;
+      // instead let it fall out from the bounded iteration.
+      obj->oop_iterate(cl, mr);
+    } else {
+      // If obj is not an objArray and mr contains the start of the
+      // obj, then this could be an imprecise mark, and we need to
+      // process the entire object.
+      obj->oop_iterate(cl);
+    }
+  }
+  return true;
+}
+
 bool HeapRegion::oops_on_card_seq_iterate_careful(MemRegion mr,
-                                                  FilterOutOfRegionClosure* cl,
-                                                  jbyte* card_ptr) {
-  assert(card_ptr != NULL, "pre-condition");
+                                                  FilterOutOfRegionClosure* cl) {
+  assert(MemRegion(bottom(), end()).contains(mr), "Card region not in heap region");
  G1CollectedHeap* g1h = G1CollectedHeap::heap();

-  // If we're within a stop-world GC, then we might look at a card in a
-  // GC alloc region that extends onto a GC LAB, which may not be
-  // parseable.  Stop such at the "scan_top" of the region.
-  if (g1h->is_gc_active()) {
-    mr = mr.intersection(MemRegion(bottom(), scan_top()));
-  } else {
-    mr = mr.intersection(used_region());
+  // Special handling for humongous regions.
+  if (is_humongous()) {
+    return do_oops_on_card_in_humongous(mr, cl, this, g1h);
  }
-  if (mr.is_empty()) {
-    return true;
-  }
-  // Otherwise, find the obj that extends onto mr.start().
+  assert(is_old(), "precondition");

-  // The intersection of the incoming mr (for the card) and the
-  // allocated part of the region is non-empty. This implies that
-  // we have actually allocated into this region. The code in
-  // G1CollectedHeap.cpp that allocates a new region sets the
-  // is_young tag on the region before allocating. Thus we
-  // safely know if this region is young.
-  if (is_young()) {
-    return true;
-  }
-
-  // We can only clean the card here, after we make the decision that
-  // the card is not young.
-  *card_ptr = CardTableModRefBS::clean_card_val();
-  // We must complete this write before we do any of the reads below.
-  OrderAccess::storeload();
+  // Because mr has been trimmed to what's been allocated in this
+  // region, the parts of the heap that are examined here are always
+  // parsable; there's no need to use klass_or_null to detect
+  // in-progress allocation.

  // Cache the boundaries of the memory region in some const locals
  HeapWord* const start = mr.start();
  HeapWord* const end = mr.end();

-  // Update BOT as needed while finding start of (potential) object.
+  // Find the obj that extends onto mr.start().
+  // Update BOT as needed while finding start of (possibly dead)
+  // object containing the start of the region.
  HeapWord* cur = block_start(start);
-  assert(cur <= start, "Postcondition");

-  oop obj;
-
-  HeapWord* next = cur;
-  do {
-    cur = next;
-    obj = oop(cur);
-    if (obj->klass_or_null() == NULL) {
-      // Ran into an unparseable point.
-      assert(!g1h->is_gc_active(),
-             "Unparsable heap during GC at " PTR_FORMAT, p2i(cur));
-      return false;
-    }
-    // Otherwise...
-    next = cur + block_size(cur);
-  } while (next <= start);
-
-  // If we finish the above loop...We have a parseable object that
-  // begins on or before the start of the memory region, and ends
-  // inside or spans the entire region.
-  assert(cur <= start, "Loop postcondition");
-  assert(obj->klass_or_null() != NULL, "Loop postcondition");
+#ifdef ASSERT
+  {
+    assert(cur <= start,
+           "cur: " PTR_FORMAT ", start: " PTR_FORMAT, p2i(cur), p2i(start));
+    HeapWord* next = cur + block_size(cur);
+    assert(start < next,
+           "start: " PTR_FORMAT ", next: " PTR_FORMAT, p2i(start), p2i(next));
+  }
+#endif

  do {
-    obj = oop(cur);
-    assert((cur + block_size(cur)) > (HeapWord*)obj, "Loop invariant");
-    if (obj->klass_or_null() == NULL) {
-      // Ran into an unparseable point.
-      assert(!g1h->is_gc_active(),
-             "Unparsable heap during GC at " PTR_FORMAT, p2i(cur));
-      return false;
-    }
+    oop obj = oop(cur);
+    assert(obj->is_oop(true), "Not an oop at " PTR_FORMAT, p2i(cur));
+    assert(obj->klass_or_null() != NULL,
+           "Unparsable heap at " PTR_FORMAT, p2i(cur));

-    // Advance the current pointer. "obj" still points to the object to iterate.
-    cur = cur + block_size(cur);
-
-    if (!g1h->is_obj_dead(obj)) {
-      // Non-objArrays are sometimes marked imprecise at the object start. We
-      // always need to iterate over them in full.
-      // We only iterate over object arrays in full if they are completely contained
-      // in the memory region.
+    if (g1h->is_obj_dead(obj, this)) {
+      // Carefully step over dead object.
+      cur += block_size(cur);
+    } else {
+      // Step over live object, and process its references.
+      cur += obj->size();
+      // Non-objArrays are usually marked imprecise at the object
+      // start, in which case we need to iterate over them in full.
+      // objArrays are precisely marked, but can still be iterated
+      // over in full if completely covered.
      if (!obj->is_objArray() || (((HeapWord*)obj) >= start && cur <= end)) {
        obj->oop_iterate(cl);
      } else {
--- a/hotspot/src/share/vm/gc/g1/heapRegion.hpp
+++ b/hotspot/src/share/vm/gc/g1/heapRegion.hpp
@ -51,8 +51,9 @@
 // object is larger than a heap region, the following regions will
 // be of type ContinuesHumongous. In this case the top() of the
 // StartHumongous region and all ContinuesHumongous regions except
-// the last will point to their own end. For the last ContinuesHumongous
-// region, top() will equal the object's top.
+// the last will point to their own end. The last ContinuesHumongous
+// region may have top() equal the end of object if there isn't
+// room for filler objects to pad out to the end of the region.

 class G1CollectedHeap;
 class HeapRegionRemSet;
@ -433,6 +434,8 @@ class HeapRegion: public G1ContiguousSpace {

  bool is_old() const { return _type.is_old(); }

+  bool is_old_or_humongous() const { return _type.is_old_or_humongous(); }
+
  // A pinned region contains objects which are not moved by garbage collections.
  // Humongous regions and archive regions are pinned.
  bool is_pinned() const { return _type.is_pinned(); }
@ -653,17 +656,18 @@ class HeapRegion: public G1ContiguousSpace {
    }
  }

-  // Iterate over the card in the card designated by card_ptr,
-  // applying cl to all references in the region.
-  // mr: the memory region covered by the card.
-  // card_ptr: if we decide that the card is not young and we iterate
-  // over it, we'll clean the card before we start the iteration.
-  // Returns true if card was successfully processed, false if an
-  // unparsable part of the heap was encountered, which should only
-  // happen when invoked concurrently with the mutator.
+  // Iterate over the objects overlapping part of a card, applying cl
+  // to all references in the region.  This is a helper for
+  // G1RemSet::refine_card, and is tightly coupled with it.
+  // mr: the memory region covered by the card, trimmed to the
+  // allocated space for this region.  Must not be empty.
+  // This region must be old or humongous.
+  // Returns true if the designated objects were successfully
+  // processed, false if an unparsable part of the heap was
+  // encountered; that only happens when invoked concurrently with the
+  // mutator.
  bool oops_on_card_seq_iterate_careful(MemRegion mr,
-                                        FilterOutOfRegionClosure* cl,
-                                        jbyte* card_ptr);
+                                        FilterOutOfRegionClosure* cl);

  size_t recorded_rs_length() const        { return _recorded_rs_length; }
  double predicted_elapsed_time_ms() const { return _predicted_elapsed_time_ms; }
--- a/hotspot/src/share/vm/gc/g1/heapRegionManager.cpp
+++ b/hotspot/src/share/vm/gc/g1/heapRegionManager.cpp
@ -286,7 +286,7 @@ uint HeapRegionManager::find_highest_free(bool* expanded) {
  while (true) {
    HeapRegion *hr = _regions.get_by_index(curr);
    if (hr == NULL) {
-      uint res = expand_at(curr, 1);
+      uint res = expand_at(curr, 1, NULL);
      if (res == 1) {
        *expanded = true;
        return curr;
@ -304,7 +304,7 @@ uint HeapRegionManager::find_highest_free(bool* expanded) {
  }
 }

-bool HeapRegionManager::allocate_containing_regions(MemRegion range, size_t* commit_count) {
+bool HeapRegionManager::allocate_containing_regions(MemRegion range, size_t* commit_count, WorkGang* pretouch_workers) {
  size_t commits = 0;
  uint start_index = (uint)_regions.get_index_by_address(range.start());
  uint last_index = (uint)_regions.get_index_by_address(range.last());
@ -314,7 +314,7 @@ bool HeapRegionManager::allocate_containing_regions(MemRegion range, size_t* com
  for (uint curr_index = start_index; curr_index <= last_index; curr_index++) {
    if (!is_available(curr_index)) {
      commits++;
-      expand_at(curr_index, 1);
+      expand_at(curr_index, 1, pretouch_workers);
    }
    HeapRegion* curr_region  = _regions.get_by_index(curr_index);
    if (!curr_region->is_free()) {
--- a/hotspot/src/share/vm/gc/g1/heapRegionManager.hpp
+++ b/hotspot/src/share/vm/gc/g1/heapRegionManager.hpp
@ -210,12 +210,12 @@ public:
  // HeapRegions, or re-use existing ones. Returns the number of regions the
  // sequence was expanded by. If a HeapRegion allocation fails, the resulting
  // number of regions might be smaller than what's desired.
-  uint expand_by(uint num_regions, WorkGang* pretouch_workers = NULL);
+  uint expand_by(uint num_regions, WorkGang* pretouch_workers);

  // Makes sure that the regions from start to start+num_regions-1 are available
  // for allocation. Returns the number of regions that were committed to achieve
  // this.
-  uint expand_at(uint start, uint num_regions, WorkGang* pretouch_workers = NULL);
+  uint expand_at(uint start, uint num_regions, WorkGang* pretouch_workers);

  // Find a contiguous set of empty regions of length num. Returns the start index of
  // that set, or G1_NO_HRM_INDEX.
@ -234,7 +234,7 @@ public:
  // Allocate the regions that contain the address range specified, committing the
  // regions if necessary. Return false if any of the regions is already committed
  // and not free, and return the number of regions newly committed in commit_count.
-  bool allocate_containing_regions(MemRegion range, size_t* commit_count);
+  bool allocate_containing_regions(MemRegion range, size_t* commit_count, WorkGang* pretouch_workers);

  // Apply blk->doHeapRegion() on all committed regions in address order,
  // terminating the iteration early if doHeapRegion() returns true.
--- a/hotspot/src/share/vm/gc/g1/heapRegionType.hpp
+++ b/hotspot/src/share/vm/gc/g1/heapRegionType.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -120,6 +120,8 @@ public:
  // is_old regions may or may not also be pinned
  bool is_old() const { return (get() & OldMask) != 0; }

+  bool is_old_or_humongous() const { return (get() & (OldMask | HumongousMask)) != 0; }
+
  // is_pinned regions may be archive or humongous
  bool is_pinned() const { return (get() & PinnedMask) != 0; }

--- a/hotspot/src/share/vm/gc/shared/cardTableModRefBS.cpp
+++ b/hotspot/src/share/vm/gc/shared/cardTableModRefBS.cpp
@ -380,7 +380,7 @@ void CardTableModRefBS::dirty_MemRegion(MemRegion mr) {
  }
 }

-void CardTableModRefBS::invalidate(MemRegion mr, bool whole_heap) {
+void CardTableModRefBS::invalidate(MemRegion mr) {
  assert((HeapWord*)align_size_down((uintptr_t)mr.start(), HeapWordSize) == mr.start(), "Unaligned start");
  assert((HeapWord*)align_size_up  ((uintptr_t)mr.end(),   HeapWordSize) == mr.end(),   "Unaligned end"  );
  for (int i = 0; i < _cur_covered_regions; i++) {
--- a/hotspot/src/share/vm/gc/shared/cardTableModRefBS.hpp
+++ b/hotspot/src/share/vm/gc/shared/cardTableModRefBS.hpp
@ -260,7 +260,7 @@ public:
  }

  // ModRefBS functions.
-  virtual void invalidate(MemRegion mr, bool whole_heap = false);
+  virtual void invalidate(MemRegion mr);
  void clear(MemRegion mr);
  void dirty(MemRegion mr);

--- a/hotspot/src/share/vm/gc/shared/cardTableRS.hpp
+++ b/hotspot/src/share/vm/gc/shared/cardTableRS.hpp
@ -159,8 +159,8 @@ public:
  void clear(MemRegion mr) { _ct_bs->clear(mr); }
  void clear_into_younger(Generation* old_gen);

-  void invalidate(MemRegion mr, bool whole_heap = false) {
-    _ct_bs->invalidate(mr, whole_heap);
+  void invalidate(MemRegion mr) {
+    _ct_bs->invalidate(mr);
  }
  void invalidate_or_clear(Generation* old_gen);

--- a/hotspot/src/share/vm/gc/shared/collectedHeap.cpp
+++ b/hotspot/src/share/vm/gc/shared/collectedHeap.cpp
@ -601,34 +601,3 @@ void CollectedHeap::initialize_reserved_region(HeapWord *start, HeapWord *end) {
  _reserved.set_start(start);
  _reserved.set_end(end);
 }
-
-/////////////// Unit tests ///////////////
-
-#ifndef PRODUCT
-void CollectedHeap::test_is_in() {
-  CollectedHeap* heap = Universe::heap();
-
-  uintptr_t epsilon    = (uintptr_t) MinObjAlignment;
-  uintptr_t heap_start = (uintptr_t) heap->_reserved.start();
-  uintptr_t heap_end   = (uintptr_t) heap->_reserved.end();
-
-  // Test that NULL is not in the heap.
-  assert(!heap->is_in(NULL), "NULL is unexpectedly in the heap");
-
-  // Test that a pointer to before the heap start is reported as outside the heap.
-  assert(heap_start >= ((uintptr_t)NULL + epsilon), "sanity");
-  void* before_heap = (void*)(heap_start - epsilon);
-  assert(!heap->is_in(before_heap),
-         "before_heap: " PTR_FORMAT " is unexpectedly in the heap", p2i(before_heap));
-
-  // Test that a pointer to after the heap end is reported as outside the heap.
-  assert(heap_end <= ((uintptr_t)-1 - epsilon), "sanity");
-  void* after_heap = (void*)(heap_end + epsilon);
-  assert(!heap->is_in(after_heap),
-         "after_heap: " PTR_FORMAT " is unexpectedly in the heap", p2i(after_heap));
-}
-
-void CollectedHeap_test() {
-  CollectedHeap::test_is_in();
-}
-#endif
--- a/hotspot/src/share/vm/gc/shared/collectedHeap.hpp
+++ b/hotspot/src/share/vm/gc/shared/collectedHeap.hpp
@ -612,9 +612,6 @@ class CollectedHeap : public CHeapObj<mtInternal> {
    return false;
  }

-  /////////////// Unit tests ///////////////
-
-  NOT_PRODUCT(static void test_is_in();)
 };

 // Class to set and reset the GC cause for a CollectedHeap.
--- a/hotspot/src/share/vm/gc/shared/modRefBarrierSet.hpp
+++ b/hotspot/src/share/vm/gc/shared/modRefBarrierSet.hpp
@ -86,10 +86,8 @@ public:
    assert(false, "can't call");
  }

-  // Causes all refs in "mr" to be assumed to be modified.  If "whole_heap"
-  // is true, the caller asserts that the entire heap is being invalidated,
-  // which may admit an optimized implementation for some barriers.
-  virtual void invalidate(MemRegion mr, bool whole_heap = false) = 0;
+  // Causes all refs in "mr" to be assumed to be modified.
+  virtual void invalidate(MemRegion mr) = 0;

  // The caller guarantees that "mr" contains no references.  (Perhaps it's
  // objects have been moved elsewhere.)
--- a/hotspot/src/share/vm/interpreter/abstractInterpreter.cpp
+++ b/hotspot/src/share/vm/interpreter/abstractInterpreter.cpp
@ -124,29 +124,19 @@ AbstractInterpreter::MethodKind AbstractInterpreter::method_kind(methodHandle m)
  }

 #ifndef CC_INTERP
-  if (UseCRC32Intrinsics && m->is_native()) {
+  switch (m->intrinsic_id()) {
    // Use optimized stub code for CRC32 native methods.
-    switch (m->intrinsic_id()) {
-      case vmIntrinsics::_updateCRC32            : return java_util_zip_CRC32_update;
-      case vmIntrinsics::_updateBytesCRC32       : return java_util_zip_CRC32_updateBytes;
-      case vmIntrinsics::_updateByteBufferCRC32  : return java_util_zip_CRC32_updateByteBuffer;
-    }
-  }
-  if (UseCRC32CIntrinsics) {
+    case vmIntrinsics::_updateCRC32            : return java_util_zip_CRC32_update;
+    case vmIntrinsics::_updateBytesCRC32       : return java_util_zip_CRC32_updateBytes;
+    case vmIntrinsics::_updateByteBufferCRC32  : return java_util_zip_CRC32_updateByteBuffer;
    // Use optimized stub code for CRC32C methods.
-    switch (m->intrinsic_id()) {
-      case vmIntrinsics::_updateBytesCRC32C             : return java_util_zip_CRC32C_updateBytes;
-      case vmIntrinsics::_updateDirectByteBufferCRC32C  : return java_util_zip_CRC32C_updateDirectByteBuffer;
-    }
+    case vmIntrinsics::_updateBytesCRC32C             : return java_util_zip_CRC32C_updateBytes;
+    case vmIntrinsics::_updateDirectByteBufferCRC32C  : return java_util_zip_CRC32C_updateDirectByteBuffer;
+    case vmIntrinsics::_intBitsToFloat:      return java_lang_Float_intBitsToFloat;
+    case vmIntrinsics::_floatToRawIntBits:   return java_lang_Float_floatToRawIntBits;
+    case vmIntrinsics::_longBitsToDouble:    return java_lang_Double_longBitsToDouble;
+    case vmIntrinsics::_doubleToRawLongBits: return java_lang_Double_doubleToRawLongBits;
  }
-
-  switch(m->intrinsic_id()) {
-  case vmIntrinsics::_intBitsToFloat:      return java_lang_Float_intBitsToFloat;
-  case vmIntrinsics::_floatToRawIntBits:   return java_lang_Float_floatToRawIntBits;
-  case vmIntrinsics::_longBitsToDouble:    return java_lang_Double_longBitsToDouble;
-  case vmIntrinsics::_doubleToRawLongBits: return java_lang_Double_doubleToRawLongBits;
-  }
-
 #endif // CC_INTERP

  // Native method?
@ -189,18 +179,13 @@ AbstractInterpreter::MethodKind AbstractInterpreter::method_kind(methodHandle m)
    case vmIntrinsics::_dlog10: return java_lang_math_log10;
    case vmIntrinsics::_dpow  : return java_lang_math_pow  ;
    case vmIntrinsics::_dexp  : return java_lang_math_exp  ;
+    case vmIntrinsics::_fmaD  : return java_lang_math_fmaD ;
+    case vmIntrinsics::_fmaF  : return java_lang_math_fmaF ;

    case vmIntrinsics::_Reference_get:
                                return java_lang_ref_reference_get;
  }

-  if (UseFMA) {
-    switch (m->intrinsic_id()) {
-      case vmIntrinsics::_fmaD: return java_lang_math_fmaD;
-      case vmIntrinsics::_fmaF: return java_lang_math_fmaF;
-    }
-  }
-
  // Accessor method?
  if (m->is_getter()) {
    // TODO: We should have used ::is_accessor above, but fast accessors in Zero expect only getters.
--- a/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.cpp
+++ b/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.cpp
@ -239,10 +239,8 @@ void TemplateInterpreterGenerator::generate_all() {
      method_entry(java_lang_math_log10)
      method_entry(java_lang_math_exp  )
      method_entry(java_lang_math_pow  )
-      if (UseFMA) {
-        method_entry(java_lang_math_fmaF)
-        method_entry(java_lang_math_fmaD)
-      }
+      method_entry(java_lang_math_fmaF )
+      method_entry(java_lang_math_fmaD )
      method_entry(java_lang_ref_reference_get)

      AbstractInterpreter::initialize_method_handle_entries();
@ -253,16 +251,11 @@ void TemplateInterpreterGenerator::generate_all() {
      method_entry(native_synchronized)
      Interpreter::_native_entry_end = Interpreter::code()->code_end();

-      if (UseCRC32Intrinsics) {
-        method_entry(java_util_zip_CRC32_update)
-        method_entry(java_util_zip_CRC32_updateBytes)
-        method_entry(java_util_zip_CRC32_updateByteBuffer)
-      }
-
-      if (UseCRC32CIntrinsics) {
-        method_entry(java_util_zip_CRC32C_updateBytes)
-        method_entry(java_util_zip_CRC32C_updateDirectByteBuffer)
-      }
+      method_entry(java_util_zip_CRC32_update)
+      method_entry(java_util_zip_CRC32_updateBytes)
+      method_entry(java_util_zip_CRC32_updateByteBuffer)
+      method_entry(java_util_zip_CRC32C_updateBytes)
+      method_entry(java_util_zip_CRC32C_updateDirectByteBuffer)

      method_entry(java_lang_Float_intBitsToFloat);
      method_entry(java_lang_Float_floatToRawIntBits);
@ -451,7 +444,7 @@ address TemplateInterpreterGenerator::generate_method_entry(
  case Interpreter::java_lang_math_pow     : // fall thru
  case Interpreter::java_lang_math_exp     : // fall thru
  case Interpreter::java_lang_math_fmaD    : // fall thru
-  case Interpreter::java_lang_math_fmaF     : entry_point = generate_math_entry(kind);      break;
+  case Interpreter::java_lang_math_fmaF    : entry_point = generate_math_entry(kind);      break;
  case Interpreter::java_lang_ref_reference_get
                                           : entry_point = generate_Reference_get_entry(); break;
  case Interpreter::java_util_zip_CRC32_update
--- a/hotspot/src/share/vm/jvmci/vmStructs_jvmci.cpp
+++ b/hotspot/src/share/vm/jvmci/vmStructs_jvmci.cpp
@ -280,8 +280,25 @@
  static_field(StubRoutines,                _aescrypt_decryptBlock,                           address)                               \
  static_field(StubRoutines,                _cipherBlockChaining_encryptAESCrypt,             address)                               \
  static_field(StubRoutines,                _cipherBlockChaining_decryptAESCrypt,             address)                               \
+  static_field(StubRoutines,                _counterMode_AESCrypt,                            address)                               \
+  static_field(StubRoutines,                _ghash_processBlocks,                             address)                               \
+  static_field(StubRoutines,                _sha1_implCompress,                               address)                               \
+  static_field(StubRoutines,                _sha1_implCompressMB,                             address)                               \
+  static_field(StubRoutines,                _sha256_implCompress,                             address)                               \
+  static_field(StubRoutines,                _sha256_implCompressMB,                           address)                               \
+  static_field(StubRoutines,                _sha512_implCompress,                             address)                               \
+  static_field(StubRoutines,                _sha512_implCompressMB,                           address)                               \
  static_field(StubRoutines,                _updateBytesCRC32,                                address)                               \
  static_field(StubRoutines,                _crc_table_adr,                                   address)                               \
+  static_field(StubRoutines,                _crc32c_table_addr,                               address)                               \
+  static_field(StubRoutines,                _updateBytesCRC32C,                               address)                               \
+  static_field(StubRoutines,                _updateBytesAdler32,                              address)                               \
+  static_field(StubRoutines,                _multiplyToLen,                                   address)                               \
+  static_field(StubRoutines,                _squareToLen,                                     address)                               \
+  static_field(StubRoutines,                _mulAdd,                                          address)                               \
+  static_field(StubRoutines,                _montgomeryMultiply,                              address)                               \
+  static_field(StubRoutines,                _montgomerySquare,                                address)                               \
+  static_field(StubRoutines,                _vectorizedMismatch,                              address)                               \
                                                                                                                                     \
  nonstatic_field(Thread,                   _tlab,                                            ThreadLocalAllocBuffer)                \
  nonstatic_field(Thread,                   _allocated_bytes,                                 jlong)                                 \
--- a/hotspot/src/share/vm/memory/metaspace.cpp
+++ b/hotspot/src/share/vm/memory/metaspace.cpp
@ -153,7 +153,7 @@ class ChunkManager : public CHeapObj<mtInternal> {

  // Map a size to a list index assuming that there are lists
  // for special, small, medium, and humongous chunks.
-  static ChunkIndex list_index(size_t size);
+  ChunkIndex list_index(size_t size);

  // Remove the chunk from its freelist.  It is
  // expected to be on one of the _free_chunks[] lists.
@ -489,6 +489,10 @@ VirtualSpaceNode::VirtualSpaceNode(size_t bytes) : _top(NULL), _next(NULL), _rs(
      // Get a mmap region anywhere if the SharedBaseAddress fails.
      _rs = ReservedSpace(bytes, Metaspace::reserve_alignment(), large_pages);
    }
+    if (!_rs.is_reserved()) {
+      vm_exit_during_initialization("Unable to allocate memory for shared space",
+        err_msg(SIZE_FORMAT " bytes.", bytes));
+    }
    MetaspaceShared::initialize_shared_rs(&_rs);
  } else
 #endif
@ -592,9 +596,8 @@ class VirtualSpaceList : public CHeapObj<mtClass> {

  size_t free_bytes();

-  Metachunk* get_new_chunk(size_t word_size,
-                           size_t grow_chunks_by_words,
-                           size_t medium_chunk_bunch);
+  Metachunk* get_new_chunk(size_t chunk_word_size,
+                           size_t suggested_commit_granularity);

  bool expand_node_by(VirtualSpaceNode* node,
                      size_t min_words,
@ -745,15 +748,22 @@ class SpaceManager : public CHeapObj<mtClass> {
    MediumChunkMultiple = 4
  };

-  bool is_class() { return _mdtype == Metaspace::ClassType; }
+  static size_t specialized_chunk_size(bool is_class) { return is_class ? ClassSpecializedChunk : SpecializedChunk; }
+  static size_t small_chunk_size(bool is_class)       { return is_class ? ClassSmallChunk : SmallChunk; }
+  static size_t medium_chunk_size(bool is_class)      { return is_class ? ClassMediumChunk : MediumChunk; }
+
+  static size_t smallest_chunk_size(bool is_class)    { return specialized_chunk_size(is_class); }

  // Accessors
-  size_t specialized_chunk_size() { return (size_t) is_class() ? ClassSpecializedChunk : SpecializedChunk; }
-  size_t small_chunk_size()       { return (size_t) is_class() ? ClassSmallChunk : SmallChunk; }
-  size_t medium_chunk_size()      { return (size_t) is_class() ? ClassMediumChunk : MediumChunk; }
-  size_t medium_chunk_bunch()     { return medium_chunk_size() * MediumChunkMultiple; }
+  bool is_class() const { return _mdtype == Metaspace::ClassType; }

-  size_t smallest_chunk_size()  { return specialized_chunk_size(); }
+  size_t specialized_chunk_size() const { return specialized_chunk_size(is_class()); }
+  size_t small_chunk_size()       const { return small_chunk_size(is_class()); }
+  size_t medium_chunk_size()      const { return medium_chunk_size(is_class()); }
+
+  size_t smallest_chunk_size()    const { return smallest_chunk_size(is_class()); }
+
+  size_t medium_chunk_bunch()     const { return medium_chunk_size() * MediumChunkMultiple; }

  size_t allocated_blocks_words() const { return _allocated_blocks_words; }
  size_t allocated_blocks_bytes() const { return _allocated_blocks_words * BytesPerWord; }
@ -777,10 +787,13 @@ class SpaceManager : public CHeapObj<mtClass> {
  // decremented for all the Metachunks in-use by this SpaceManager.
  void dec_total_from_size_metrics();

-  // Set the sizes for the initial chunks.
-  void get_initial_chunk_sizes(Metaspace::MetaspaceType type,
-                               size_t* chunk_word_size,
-                               size_t* class_chunk_word_size);
+  // Adjust the initial chunk size to match one of the fixed chunk list sizes,
+  // or return the unadjusted size if the requested size is humongous.
+  static size_t adjust_initial_chunk_size(size_t requested, bool is_class_space);
+  size_t adjust_initial_chunk_size(size_t requested) const;
+
+  // Get the initial chunks size for this metaspace type.
+  size_t get_initial_chunk_size(Metaspace::MetaspaceType type) const;

  size_t sum_capacity_in_chunks_in_use() const;
  size_t sum_used_in_chunks_in_use() const;
@ -791,7 +804,7 @@ class SpaceManager : public CHeapObj<mtClass> {
  size_t sum_count_in_chunks_in_use();
  size_t sum_count_in_chunks_in_use(ChunkIndex i);

-  Metachunk* get_new_chunk(size_t word_size, size_t grow_chunks_by_words);
+  Metachunk* get_new_chunk(size_t chunk_word_size);

  // Block allocation and deallocation.
  // Allocates a block from the current chunk
@ -1396,12 +1409,10 @@ bool VirtualSpaceList::expand_by(size_t min_words, size_t preferred_words) {
  return false;
 }

-Metachunk* VirtualSpaceList::get_new_chunk(size_t word_size,
-                                           size_t grow_chunks_by_words,
-                                           size_t medium_chunk_bunch) {
+Metachunk* VirtualSpaceList::get_new_chunk(size_t chunk_word_size, size_t suggested_commit_granularity) {

  // Allocate a chunk out of the current virtual space.
-  Metachunk* next = current_virtual_space()->get_chunk_vs(grow_chunks_by_words);
+  Metachunk* next = current_virtual_space()->get_chunk_vs(chunk_word_size);

  if (next != NULL) {
    return next;
@ -1410,8 +1421,8 @@ Metachunk* VirtualSpaceList::get_new_chunk(size_t word_size,
  // The expand amount is currently only determined by the requested sizes
  // and not how much committed memory is left in the current virtual space.

-  size_t min_word_size       = align_size_up(grow_chunks_by_words, Metaspace::commit_alignment_words());
-  size_t preferred_word_size = align_size_up(medium_chunk_bunch,   Metaspace::commit_alignment_words());
+  size_t min_word_size       = align_size_up(chunk_word_size,              Metaspace::commit_alignment_words());
+  size_t preferred_word_size = align_size_up(suggested_commit_granularity, Metaspace::commit_alignment_words());
  if (min_word_size >= preferred_word_size) {
    // Can happen when humongous chunks are allocated.
    preferred_word_size = min_word_size;
@ -1419,7 +1430,7 @@ Metachunk* VirtualSpaceList::get_new_chunk(size_t word_size,

  bool expanded = expand_by(min_word_size, preferred_word_size);
  if (expanded) {
-    next = current_virtual_space()->get_chunk_vs(grow_chunks_by_words);
+    next = current_virtual_space()->get_chunk_vs(chunk_word_size);
    assert(next != NULL, "The allocation was expected to succeed after the expansion");
  }

@ -1783,7 +1794,11 @@ void ChunkManager::locked_print_sum_free_chunks(outputStream* st) {
  st->print_cr("Sum free chunk total " SIZE_FORMAT "  count " SIZE_FORMAT,
                sum_free_chunks(), sum_free_chunks_count());
 }
+
 ChunkList* ChunkManager::free_chunks(ChunkIndex index) {
+  assert(index == SpecializedIndex || index == SmallIndex || index == MediumIndex,
+         "Bad index: %d", (int)index);
+
  return &_free_chunks[index];
 }

@ -1887,7 +1902,7 @@ Metachunk* ChunkManager::chunk_freelist_allocate(size_t word_size) {
  }

  assert((word_size <= chunk->word_size()) ||
-         list_index(chunk->word_size() == HumongousIndex),
+         (list_index(chunk->word_size()) == HumongousIndex),
         "Non-humongous variable sized chunk");
  Log(gc, metaspace, freelist) log;
  if (log.is_debug()) {
@ -1913,36 +1928,58 @@ void ChunkManager::print_on(outputStream* out) const {

 // SpaceManager methods

-void SpaceManager::get_initial_chunk_sizes(Metaspace::MetaspaceType type,
-                                           size_t* chunk_word_size,
-                                           size_t* class_chunk_word_size) {
-  switch (type) {
-  case Metaspace::BootMetaspaceType:
-    *chunk_word_size = Metaspace::first_chunk_word_size();
-    *class_chunk_word_size = Metaspace::first_class_chunk_word_size();
-    break;
-  case Metaspace::ROMetaspaceType:
-    *chunk_word_size = SharedReadOnlySize / wordSize;
-    *class_chunk_word_size = ClassSpecializedChunk;
-    break;
-  case Metaspace::ReadWriteMetaspaceType:
-    *chunk_word_size = SharedReadWriteSize / wordSize;
-    *class_chunk_word_size = ClassSpecializedChunk;
-    break;
-  case Metaspace::AnonymousMetaspaceType:
-  case Metaspace::ReflectionMetaspaceType:
-    *chunk_word_size = SpecializedChunk;
-    *class_chunk_word_size = ClassSpecializedChunk;
-    break;
-  default:
-    *chunk_word_size = SmallChunk;
-    *class_chunk_word_size = ClassSmallChunk;
-    break;
+size_t SpaceManager::adjust_initial_chunk_size(size_t requested, bool is_class_space) {
+  size_t chunk_sizes[] = {
+      specialized_chunk_size(is_class_space),
+      small_chunk_size(is_class_space),
+      medium_chunk_size(is_class_space)
+  };
+
+  // Adjust up to one of the fixed chunk sizes ...
+  for (size_t i = 0; i < ARRAY_SIZE(chunk_sizes); i++) {
+    if (requested <= chunk_sizes[i]) {
+      return chunk_sizes[i];
+    }
  }
-  assert(*chunk_word_size != 0 && *class_chunk_word_size != 0,
-         "Initial chunks sizes bad: data  " SIZE_FORMAT
-         " class " SIZE_FORMAT,
-         *chunk_word_size, *class_chunk_word_size);
+
+  // ... or return the size as a humongous chunk.
+  return requested;
+}
+
+size_t SpaceManager::adjust_initial_chunk_size(size_t requested) const {
+  return adjust_initial_chunk_size(requested, is_class());
+}
+
+size_t SpaceManager::get_initial_chunk_size(Metaspace::MetaspaceType type) const {
+  size_t requested;
+
+  if (is_class()) {
+    switch (type) {
+    case Metaspace::BootMetaspaceType:       requested = Metaspace::first_class_chunk_word_size(); break;
+    case Metaspace::ROMetaspaceType:         requested = ClassSpecializedChunk; break;
+    case Metaspace::ReadWriteMetaspaceType:  requested = ClassSpecializedChunk; break;
+    case Metaspace::AnonymousMetaspaceType:  requested = ClassSpecializedChunk; break;
+    case Metaspace::ReflectionMetaspaceType: requested = ClassSpecializedChunk; break;
+    default:                                 requested = ClassSmallChunk; break;
+    }
+  } else {
+    switch (type) {
+    case Metaspace::BootMetaspaceType:       requested = Metaspace::first_chunk_word_size(); break;
+    case Metaspace::ROMetaspaceType:         requested = SharedReadOnlySize / wordSize; break;
+    case Metaspace::ReadWriteMetaspaceType:  requested = SharedReadWriteSize / wordSize; break;
+    case Metaspace::AnonymousMetaspaceType:  requested = SpecializedChunk; break;
+    case Metaspace::ReflectionMetaspaceType: requested = SpecializedChunk; break;
+    default:                                 requested = SmallChunk; break;
+    }
+  }
+
+  // Adjust to one of the fixed chunk sizes (unless humongous)
+  const size_t adjusted = adjust_initial_chunk_size(requested);
+
+  assert(adjusted != 0, "Incorrect initial chunk size. Requested: "
+         SIZE_FORMAT " adjusted: " SIZE_FORMAT, requested, adjusted);
+
+  return adjusted;
 }

 size_t SpaceManager::sum_free_in_chunks_in_use() const {
@ -2127,8 +2164,8 @@ MetaWord* SpaceManager::grow_and_allocate(size_t word_size) {
  }

  // Get another chunk
-  size_t grow_chunks_by_words = calc_chunk_size(word_size);
-  Metachunk* next = get_new_chunk(word_size, grow_chunks_by_words);
+  size_t chunk_word_size = calc_chunk_size(word_size);
+  Metachunk* next = get_new_chunk(chunk_word_size);

  MetaWord* mem = NULL;

@ -2338,22 +2375,18 @@ const char* SpaceManager::chunk_size_name(ChunkIndex index) const {
 }

 ChunkIndex ChunkManager::list_index(size_t size) {
-  switch (size) {
-    case SpecializedChunk:
-      assert(SpecializedChunk == ClassSpecializedChunk,
-             "Need branch for ClassSpecializedChunk");
-      return SpecializedIndex;
-    case SmallChunk:
-    case ClassSmallChunk:
-      return SmallIndex;
-    case MediumChunk:
-    case ClassMediumChunk:
-      return MediumIndex;
-    default:
-      assert(size > MediumChunk || size > ClassMediumChunk,
-             "Not a humongous chunk");
-      return HumongousIndex;
+  if (free_chunks(SpecializedIndex)->size() == size) {
+    return SpecializedIndex;
  }
+  if (free_chunks(SmallIndex)->size() == size) {
+    return SmallIndex;
+  }
+  if (free_chunks(MediumIndex)->size() == size) {
+    return MediumIndex;
+  }
+
+  assert(size > free_chunks(MediumIndex)->size(), "Not a humongous chunk");
+  return HumongousIndex;
 }

 void SpaceManager::deallocate(MetaWord* p, size_t word_size) {
@ -2377,7 +2410,7 @@ void SpaceManager::add_chunk(Metachunk* new_chunk, bool make_current) {

  // Find the correct list and and set the current
  // chunk for that list.
-  ChunkIndex index = ChunkManager::list_index(new_chunk->word_size());
+  ChunkIndex index = chunk_manager()->list_index(new_chunk->word_size());

  if (index != HumongousIndex) {
    retire_current_chunk();
@ -2427,14 +2460,12 @@ void SpaceManager::retire_current_chunk() {
  }
 }

-Metachunk* SpaceManager::get_new_chunk(size_t word_size,
-                                       size_t grow_chunks_by_words) {
+Metachunk* SpaceManager::get_new_chunk(size_t chunk_word_size) {
  // Get a chunk from the chunk freelist
-  Metachunk* next = chunk_manager()->chunk_freelist_allocate(grow_chunks_by_words);
+  Metachunk* next = chunk_manager()->chunk_freelist_allocate(chunk_word_size);

  if (next == NULL) {
-    next = vs_list()->get_new_chunk(word_size,
-                                    grow_chunks_by_words,
+    next = vs_list()->get_new_chunk(chunk_word_size,
                                    medium_chunk_bunch());
  }

@ -3172,7 +3203,7 @@ void Metaspace::initialize_class_space(ReservedSpace rs) {
         SIZE_FORMAT " != " SIZE_FORMAT, rs.size(), CompressedClassSpaceSize);
  assert(using_class_space(), "Must be using class space");
  _class_space_list = new VirtualSpaceList(rs);
-  _chunk_manager_class = new ChunkManager(SpecializedChunk, ClassSmallChunk, ClassMediumChunk);
+  _chunk_manager_class = new ChunkManager(ClassSpecializedChunk, ClassSmallChunk, ClassMediumChunk);

  if (!_class_space_list->initialization_succeeded()) {
    vm_exit_during_initialization("Failed to setup compressed class space virtual space list.");
@ -3342,75 +3373,62 @@ void Metaspace::post_initialize() {
  MetaspaceGC::post_initialize();
 }

-Metachunk* Metaspace::get_initialization_chunk(MetadataType mdtype,
-                                               size_t chunk_word_size,
-                                               size_t chunk_bunch) {
+void Metaspace::initialize_first_chunk(MetaspaceType type, MetadataType mdtype) {
+  Metachunk* chunk = get_initialization_chunk(type, mdtype);
+  if (chunk != NULL) {
+    // Add to this manager's list of chunks in use and current_chunk().
+    get_space_manager(mdtype)->add_chunk(chunk, true);
+  }
+}
+
+Metachunk* Metaspace::get_initialization_chunk(MetaspaceType type, MetadataType mdtype) {
+  size_t chunk_word_size = get_space_manager(mdtype)->get_initial_chunk_size(type);
+
  // Get a chunk from the chunk freelist
  Metachunk* chunk = get_chunk_manager(mdtype)->chunk_freelist_allocate(chunk_word_size);
-  if (chunk != NULL) {
-    return chunk;
+
+  if (chunk == NULL) {
+    chunk = get_space_list(mdtype)->get_new_chunk(chunk_word_size,
+                                                  get_space_manager(mdtype)->medium_chunk_bunch());
  }

-  return get_space_list(mdtype)->get_new_chunk(chunk_word_size, chunk_word_size, chunk_bunch);
+  // For dumping shared archive, report error if allocation has failed.
+  if (DumpSharedSpaces && chunk == NULL) {
+    report_insufficient_metaspace(MetaspaceAux::committed_bytes() + chunk_word_size * BytesPerWord);
+  }
+
+  return chunk;
+}
+
+void Metaspace::verify_global_initialization() {
+  assert(space_list() != NULL, "Metadata VirtualSpaceList has not been initialized");
+  assert(chunk_manager_metadata() != NULL, "Metadata ChunkManager has not been initialized");
+
+  if (using_class_space()) {
+    assert(class_space_list() != NULL, "Class VirtualSpaceList has not been initialized");
+    assert(chunk_manager_class() != NULL, "Class ChunkManager has not been initialized");
+  }
 }

 void Metaspace::initialize(Mutex* lock, MetaspaceType type) {
+  verify_global_initialization();

-  assert(space_list() != NULL,
-    "Metadata VirtualSpaceList has not been initialized");
-  assert(chunk_manager_metadata() != NULL,
-    "Metadata ChunkManager has not been initialized");
-
+  // Allocate SpaceManager for metadata objects.
  _vsm = new SpaceManager(NonClassType, lock);
-  if (_vsm == NULL) {
-    return;
-  }
-  size_t word_size;
-  size_t class_word_size;
-  vsm()->get_initial_chunk_sizes(type, &word_size, &class_word_size);

  if (using_class_space()) {
-  assert(class_space_list() != NULL,
-    "Class VirtualSpaceList has not been initialized");
-  assert(chunk_manager_class() != NULL,
-    "Class ChunkManager has not been initialized");
-
    // Allocate SpaceManager for classes.
    _class_vsm = new SpaceManager(ClassType, lock);
-    if (_class_vsm == NULL) {
-      return;
-    }
  }

  MutexLockerEx cl(SpaceManager::expand_lock(), Mutex::_no_safepoint_check_flag);

  // Allocate chunk for metadata objects
-  Metachunk* new_chunk = get_initialization_chunk(NonClassType,
-                                                  word_size,
-                                                  vsm()->medium_chunk_bunch());
-  // For dumping shared archive, report error if allocation has failed.
-  if (DumpSharedSpaces && new_chunk == NULL) {
-    report_insufficient_metaspace(MetaspaceAux::committed_bytes() + word_size * BytesPerWord);
-  }
-  assert(!DumpSharedSpaces || new_chunk != NULL, "should have enough space for both chunks");
-  if (new_chunk != NULL) {
-    // Add to this manager's list of chunks in use and current_chunk().
-    vsm()->add_chunk(new_chunk, true);
-  }
+  initialize_first_chunk(type, NonClassType);

  // Allocate chunk for class metadata objects
  if (using_class_space()) {
-    Metachunk* class_chunk = get_initialization_chunk(ClassType,
-                                                      class_word_size,
-                                                      class_vsm()->medium_chunk_bunch());
-    if (class_chunk != NULL) {
-      class_vsm()->add_chunk(class_chunk, true);
-    } else {
-      // For dumping shared archive, report error if allocation has failed.
-      if (DumpSharedSpaces) {
-        report_insufficient_metaspace(MetaspaceAux::committed_bytes() + class_word_size * BytesPerWord);
-      }
-    }
+    initialize_first_chunk(type, ClassType);
  }

  _alloc_record_head = NULL;
@ -3836,7 +3854,7 @@ class TestMetaspaceAuxTest : AllStatic {
    // vm_allocation_granularity aligned on Windows.
    size_t large_size = (size_t)(2*256*K + (os::vm_page_size()/BytesPerWord));
    large_size += (os::vm_page_size()/BytesPerWord);
-    vs_list->get_new_chunk(large_size, large_size, 0);
+    vs_list->get_new_chunk(large_size, 0);
  }

  static void test() {
@ -4013,4 +4031,91 @@ void TestVirtualSpaceNode_test() {
  TestVirtualSpaceNodeTest::test();
  TestVirtualSpaceNodeTest::test_is_available();
 }
+
+// The following test is placed here instead of a gtest / unittest file
+// because the ChunkManager class is only available in this file.
+void ChunkManager_test_list_index() {
+  ChunkManager manager(ClassSpecializedChunk, ClassSmallChunk, ClassMediumChunk);
+
+  // Test previous bug where a query for a humongous class metachunk,
+  // incorrectly matched the non-class medium metachunk size.
+  {
+    assert(MediumChunk > ClassMediumChunk, "Precondition for test");
+
+    ChunkIndex index = manager.list_index(MediumChunk);
+
+    assert(index == HumongousIndex,
+           "Requested size is larger than ClassMediumChunk,"
+           " so should return HumongousIndex. Got index: %d", (int)index);
+  }
+
+  // Check the specified sizes as well.
+  {
+    ChunkIndex index = manager.list_index(ClassSpecializedChunk);
+    assert(index == SpecializedIndex, "Wrong index returned. Got index: %d", (int)index);
+  }
+  {
+    ChunkIndex index = manager.list_index(ClassSmallChunk);
+    assert(index == SmallIndex, "Wrong index returned. Got index: %d", (int)index);
+  }
+  {
+    ChunkIndex index = manager.list_index(ClassMediumChunk);
+    assert(index == MediumIndex, "Wrong index returned. Got index: %d", (int)index);
+  }
+  {
+    ChunkIndex index = manager.list_index(ClassMediumChunk + 1);
+    assert(index == HumongousIndex, "Wrong index returned. Got index: %d", (int)index);
+  }
+}
+
+
+// The following test is placed here instead of a gtest / unittest file
+// because the ChunkManager class is only available in this file.
+class SpaceManagerTest : AllStatic {
+  friend void SpaceManager_test_adjust_initial_chunk_size();
+
+  static void test_adjust_initial_chunk_size(bool is_class) {
+    const size_t smallest = SpaceManager::smallest_chunk_size(is_class);
+    const size_t normal   = SpaceManager::small_chunk_size(is_class);
+    const size_t medium   = SpaceManager::medium_chunk_size(is_class);
+
+#define test_adjust_initial_chunk_size(value, expected, is_class_value)          \
+    do {                                                                         \
+      size_t v = value;                                                          \
+      size_t e = expected;                                                       \
+      assert(SpaceManager::adjust_initial_chunk_size(v, (is_class_value)) == e,  \
+             "Expected: " SIZE_FORMAT " got: " SIZE_FORMAT, e, v);               \
+    } while (0)
+
+    // Smallest (specialized)
+    test_adjust_initial_chunk_size(1,            smallest, is_class);
+    test_adjust_initial_chunk_size(smallest - 1, smallest, is_class);
+    test_adjust_initial_chunk_size(smallest,     smallest, is_class);
+
+    // Small
+    test_adjust_initial_chunk_size(smallest + 1, normal, is_class);
+    test_adjust_initial_chunk_size(normal - 1,   normal, is_class);
+    test_adjust_initial_chunk_size(normal,       normal, is_class);
+
+    // Medium
+    test_adjust_initial_chunk_size(normal + 1, medium, is_class);
+    test_adjust_initial_chunk_size(medium - 1, medium, is_class);
+    test_adjust_initial_chunk_size(medium,     medium, is_class);
+
+    // Humongous
+    test_adjust_initial_chunk_size(medium + 1, medium + 1, is_class);
+
+#undef test_adjust_initial_chunk_size
+  }
+
+  static void test_adjust_initial_chunk_size() {
+    test_adjust_initial_chunk_size(false);
+    test_adjust_initial_chunk_size(true);
+  }
+};
+
+void SpaceManager_test_adjust_initial_chunk_size() {
+  SpaceManagerTest::test_adjust_initial_chunk_size();
+}
+
 #endif
--- a/hotspot/src/share/vm/memory/metaspace.hpp
+++ b/hotspot/src/share/vm/memory/metaspace.hpp
@ -105,14 +105,15 @@ class Metaspace : public CHeapObj<mtClass> {
  };

 private:
+  static void verify_global_initialization();
+
  void initialize(Mutex* lock, MetaspaceType type);

-  // Get the first chunk for a Metaspace.  Used for
+  // Initialize the first chunk for a Metaspace.  Used for
  // special cases such as the boot class loader, reflection
  // class loader and anonymous class loader.
-  Metachunk* get_initialization_chunk(MetadataType mdtype,
-                                      size_t chunk_word_size,
-                                      size_t chunk_bunch);
+  void initialize_first_chunk(MetaspaceType type, MetadataType mdtype);
+  Metachunk* get_initialization_chunk(MetaspaceType type, MetadataType mdtype);

  // Align up the word size to the allocation word size
  static size_t align_word_size_up(size_t);
@ -139,6 +140,10 @@ class Metaspace : public CHeapObj<mtClass> {

  SpaceManager* _class_vsm;
  SpaceManager* class_vsm() const { return _class_vsm; }
+  SpaceManager* get_space_manager(MetadataType mdtype) {
+    assert(mdtype != MetadataTypeCount, "MetadaTypeCount can't be used as mdtype");
+    return mdtype == ClassType ? class_vsm() : vsm();
+  }

  // Allocate space for metadata of type mdtype. This is space
  // within a Metachunk and is used by
--- a/hotspot/src/share/vm/oops/constMethod.hpp
+++ b/hotspot/src/share/vm/oops/constMethod.hpp
@ -205,7 +205,7 @@ private:
  // Adapter blob (i2c/c2i) for this Method*. Set once when method is linked.
  union {
    AdapterHandlerEntry* _adapter;
-    AdapterHandlerEntry** _adapter_trampoline;
+    AdapterHandlerEntry** _adapter_trampoline; // see comments around Method::link_method()
  };

  int               _constMethod_size;
--- a/hotspot/src/share/vm/oops/method.cpp
+++ b/hotspot/src/share/vm/oops/method.cpp
@ -953,34 +953,103 @@ void Method::unlink_method() {
 }
 #endif

+/****************************************************************************
+// The following illustrates how the entries work for CDS shared Methods:
+//
+// Our goal is to delay writing into a shared Method until it's compiled.
+// Hence, we want to determine the initial values for _i2i_entry,
+// _from_interpreted_entry and _from_compiled_entry during CDS dump time.
+//
+// In this example, both Methods A and B have the _i2i_entry of "zero_locals".
+// They also have similar signatures so that they will share the same
+// AdapterHandlerEntry.
+//
+// _adapter_trampoline points to a fixed location in the RW section of
+// the CDS archive. This location initially contains a NULL pointer. When the
+// first of method A or B is linked, an AdapterHandlerEntry is allocated
+// dynamically, and its c2i/i2c entries are generated.
+//
+// _i2i_entry and _from_interpreted_entry initially points to the same
+// (fixed) location in the CODE section of the CDS archive. This contains
+// an unconditional branch to the actual entry for "zero_locals", which is
+// generated at run time and may be on an arbitrary address. Thus, the
+// unconditional branch is also generated at run time to jump to the correct
+// address.
+//
+// Similarly, _from_compiled_entry points to a fixed address in the CODE
+// section. This address has enough space for an unconditional branch
+// instruction, and is initially zero-filled. After the AdapterHandlerEntry is
+// initialized, and the address for the actual c2i_entry is known, we emit a
+// branch instruction here to branch to the actual c2i_entry.
+//
+// The effect of the extra branch on the i2i and c2i entries is negligible.
+//
+// The reason for putting _adapter_trampoline in RO is many shared Methods
+// share the same AdapterHandlerEntry, so we can save space in the RW section
+// by having the extra indirection.
+
+
+[Method A: RW]
+  _constMethod ----> [ConstMethod: RO]
+                       _adapter_trampoline -----------+
+                                                      |
+  _i2i_entry              (same value as method B)    |
+  _from_interpreted_entry (same value as method B)    |
+  _from_compiled_entry    (same value as method B)    |
+                                                      |
+                                                      |
+[Method B: RW]                               +--------+
+  _constMethod ----> [ConstMethod: RO]       |
+                       _adapter_trampoline --+--->(AdapterHandlerEntry* ptr: RW)-+
+                                                                                 |
+                                                 +-------------------------------+
+                                                 |
+                                                 +----> [AdapterHandlerEntry] (allocated at run time)
+                                                              _fingerprint
+                                                              _c2i_entry ---------------------------------+->[c2i entry..]
+ _i2i_entry  -------------+                                   _i2c_entry ---------------+-> [i2c entry..] |
+ _from_interpreted_entry  |                                   _c2i_unverified_entry     |                 |
+         |                |                                                             |                 |
+         |                |  (_cds_entry_table: CODE)                                   |                 |
+         |                +->[0]: jmp _entry_table[0] --> (i2i_entry_for "zero_locals") |                 |
+         |                |                               (allocated at run time)       |                 |
+         |                |  ...                           [asm code ...]               |                 |
+         +-[not compiled]-+  [n]: jmp _entry_table[n]                                   |                 |
+         |                                                                              |                 |
+         |                                                                              |                 |
+         +-[compiled]-------------------------------------------------------------------+                 |
+                                                                                                          |
+ _from_compiled_entry------------>  (_c2i_entry_trampoline: CODE)                                         |
+                                    [jmp c2i_entry] ------------------------------------------------------+
+
+***/
+
 // Called when the method_holder is getting linked. Setup entrypoints so the method
 // is ready to be called from interpreter, compiler, and vtables.
 void Method::link_method(const methodHandle& h_method, TRAPS) {
  // If the code cache is full, we may reenter this function for the
  // leftover methods that weren't linked.
  if (is_shared()) {
-    if (adapter() != NULL) return;
-  } else {
-    if (_i2i_entry != NULL) return;
-
-    assert(adapter() == NULL, "init'd to NULL" );
+    address entry = Interpreter::entry_for_cds_method(h_method);
+    assert(entry != NULL && entry == _i2i_entry,
+           "should be correctly set during dump time");
+    if (adapter() != NULL) {
+      return;
+    }
+    assert(entry == _from_interpreted_entry,
+           "should be correctly set during dump time");
+  } else if (_i2i_entry != NULL) {
+    return;
  }
  assert( _code == NULL, "nothing compiled yet" );

  // Setup interpreter entrypoint
  assert(this == h_method(), "wrong h_method()" );
-  address entry;

-  if (this->is_shared()) {
-    entry = Interpreter::entry_for_cds_method(h_method);
-  } else {
-    entry = Interpreter::entry_for_method(h_method);
-  }
-  assert(entry != NULL, "interpreter entry must be non-null");
-  if (is_shared()) {
-    assert(entry == _i2i_entry && entry == _from_interpreted_entry,
-           "should be correctly set during dump time");
-  } else {
+  if (!is_shared()) {
+    assert(adapter() == NULL, "init'd to NULL");
+    address entry = Interpreter::entry_for_method(h_method);
+    assert(entry != NULL, "interpreter entry must be non-null");
    // Sets both _i2i_entry and _from_interpreted_entry
    set_interpreter_entry(entry);
  }
@ -1024,7 +1093,7 @@ address Method::make_adapters(methodHandle mh, TRAPS) {

  if (mh->is_shared()) {
    assert(mh->adapter() == adapter, "must be");
-    assert(mh->_from_compiled_entry != NULL, "must be"); // FIXME, the instructions also not NULL
+    assert(mh->_from_compiled_entry != NULL, "must be");
  } else {
    mh->set_adapter_entry(adapter);
    mh->_from_compiled_entry = adapter->get_c2i_entry();
@ -1034,9 +1103,9 @@ address Method::make_adapters(methodHandle mh, TRAPS) {

 void Method::restore_unshareable_info(TRAPS) {
  // Since restore_unshareable_info can be called more than once for a method, don't
-  // redo any work.   If this field is restored, there is nothing to do.
-  if (_from_compiled_entry == NULL) {
-    // restore method's vtable by calling a virtual function
+  // redo any work.
+  if (adapter() == NULL) {
+    // Restore Method's C++ vtable by calling a virtual function
    restore_vtable();

    methodHandle mh(THREAD, this);
--- a/hotspot/src/share/vm/opto/library_call.cpp
+++ b/hotspot/src/share/vm/opto/library_call.cpp
@ -5513,7 +5513,7 @@ bool LibraryCallKit::inline_montgomeryMultiply() {
  }

  assert(UseMontgomeryMultiplyIntrinsic, "not implemented on this platform");
-  const char* stubName = "montgomery_square";
+  const char* stubName = "montgomery_multiply";

  assert(callee()->signature()->size() == 7, "montgomeryMultiply has 7 parameters");

--- a/hotspot/src/share/vm/opto/loopTransform.cpp
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp
@ -1742,7 +1742,7 @@ void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
              // The result of the reduction must not be used in the loop
              for (DUIterator_Fast imax, i = def_node->fast_outs(imax); i < imax && ok; i++) {
                Node* u = def_node->fast_out(i);
-                if (has_ctrl(u) && !loop->is_member(get_loop(get_ctrl(u)))) {
+                if (!loop->is_member(get_loop(ctrl_or_self(u)))) {
                  continue;
                }
                if (u == phi) {
--- a/hotspot/src/share/vm/opto/node.cpp
+++ b/hotspot/src/share/vm/opto/node.cpp
@ -1117,8 +1117,8 @@ bool Node::has_special_unique_user() const {
  if (this->is_Store()) {
    // Condition for back-to-back stores folding.
    return n->Opcode() == op && n->in(MemNode::Memory) == this;
-  } else if (this->is_Load()) {
-    // Condition for removing an unused LoadNode from the MemBarAcquire precedence input
+  } else if (this->is_Load() || this->is_DecodeN()) {
+    // Condition for removing an unused LoadNode or DecodeNNode from the MemBarAcquire precedence input
    return n->Opcode() == Op_MemBarAcquire;
  } else if (op == Op_AddL) {
    // Condition for convL2I(addL(x,y)) ==> addI(convL2I(x),convL2I(y))
--- a/hotspot/src/share/vm/runtime/arguments.cpp
+++ b/hotspot/src/share/vm/runtime/arguments.cpp
@ -378,6 +378,7 @@ static SpecialFlag const special_jvm_flags[] = {
  { "AutoGCSelectPauseMillis",      JDK_Version::jdk(9), JDK_Version::undefined(), JDK_Version::jdk(10) },
  { "UseAutoGCSelectPolicy",        JDK_Version::jdk(9), JDK_Version::undefined(), JDK_Version::jdk(10) },
  { "UseParNewGC",                  JDK_Version::jdk(9), JDK_Version::undefined(), JDK_Version::jdk(10) },
+  { "ExplicitGCInvokesConcurrentAndUnloadsClasses", JDK_Version::jdk(9), JDK_Version::undefined(), JDK_Version::jdk(10) },
  { "ConvertSleepToYield",          JDK_Version::jdk(9), JDK_Version::jdk(10),     JDK_Version::jdk(11) },
  { "ConvertYieldToSleep",          JDK_Version::jdk(9), JDK_Version::jdk(10),     JDK_Version::jdk(11) },

@ -1318,22 +1319,31 @@ bool Arguments::add_property(const char* prop, PropertyWriteable writeable, Prop
 #if INCLUDE_CDS
 void Arguments::check_unsupported_dumping_properties() {
  assert(DumpSharedSpaces, "this function is only used with -Xshare:dump");
-  const char* unsupported_properties[5] = { "jdk.module.main",
+  const char* unsupported_properties[] = { "jdk.module.main",
+                                           "jdk.module.limitmods",
                                           "jdk.module.path",
                                           "jdk.module.upgrade.path",
-                                           "jdk.module.addmods.0",
-                                           "jdk.module.limitmods" };
-  const char* unsupported_options[5] = { "-m",
+                                           "jdk.module.addmods.0" };
+  const char* unsupported_options[] = { "-m",
+                                        "--limit-modules",
                                        "--module-path",
                                        "--upgrade-module-path",
-                                        "--add-modules",
-                                        "--limit-modules" };
+                                        "--add-modules" };
+  assert(ARRAY_SIZE(unsupported_properties) == ARRAY_SIZE(unsupported_options), "must be");
+  // If a vm option is found in the unsupported_options array with index less than the warning_idx,
+  // vm will exit with an error message. Otherwise, it will result in a warning message.
+  uint warning_idx = 2;
  SystemProperty* sp = system_properties();
  while (sp != NULL) {
-    for (int i = 0; i < 5; i++) {
+    for (uint i = 0; i < ARRAY_SIZE(unsupported_properties); i++) {
      if (strcmp(sp->key(), unsupported_properties[i]) == 0) {
+        if (i < warning_idx) {
          vm_exit_during_initialization(
            "Cannot use the following option when dumping the shared archive", unsupported_options[i]);
+        } else {
+          warning(
+            "the %s option is ignored when dumping the shared archive", unsupported_options[i]);
+        }
      }
    }
    sp = sp->next();
--- a/Show More
+++ b/Show More