8029940: PPC64 (part 122): C2 compiler port
Reviewed-by: kvn
This commit is contained in:
parent
c50c083f83
commit
7d56518671
@ -41,13 +41,11 @@ SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad
|
||||
|
||||
ifeq ("${Platform_arch_model}", "${Platform_arch}")
|
||||
SOURCES.AD = \
|
||||
$(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
|
||||
$(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
|
||||
$(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad)
|
||||
else
|
||||
SOURCES.AD = \
|
||||
$(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
|
||||
$(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
|
||||
$(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
|
||||
$(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad)
|
||||
endif
|
||||
|
||||
EXEC = $(OUTDIR)/adlc
|
||||
|
@ -98,7 +98,17 @@ class Argument VALUE_OBJ_CLASS_SPEC {
|
||||
// Only 8 registers may contain integer parameters.
|
||||
n_register_parameters = 8,
|
||||
// Can have up to 8 floating registers.
|
||||
n_float_register_parameters = 8
|
||||
n_float_register_parameters = 8,
|
||||
|
||||
// PPC C calling conventions.
|
||||
// The first eight arguments are passed in int regs if they are int.
|
||||
n_int_register_parameters_c = 8,
|
||||
// The first thirteen float arguments are passed in float regs.
|
||||
n_float_register_parameters_c = 13,
|
||||
// Only the first 8 parameters are not placed on the stack. Aix disassembly
|
||||
// shows that xlC places all float args after argument 8 on the stack AND
|
||||
// in a register. This is not documented, but we follow this convention, too.
|
||||
n_regs_not_on_stack_c = 8,
|
||||
};
|
||||
// creation
|
||||
Argument(int number) : _number(number) {}
|
||||
@ -662,6 +672,14 @@ class Assembler : public AbstractAssembler {
|
||||
bcondCRbiIs1_bhintIsTaken = bcondCRbiIs1 | bhintatIsTaken,
|
||||
};
|
||||
|
||||
// Elemental Memory Barriers (>=Power 8)
|
||||
enum Elemental_Membar_mask_bits {
|
||||
StoreStore = 1 << 0,
|
||||
StoreLoad = 1 << 1,
|
||||
LoadStore = 1 << 2,
|
||||
LoadLoad = 1 << 3
|
||||
};
|
||||
|
||||
// Branch prediction hints.
|
||||
inline static int add_bhint_to_boint(const int bhint, const int boint) {
|
||||
switch (boint) {
|
||||
@ -753,17 +771,6 @@ class Assembler : public AbstractAssembler {
|
||||
|
||||
enum Predict { pt = 1, pn = 0 }; // pt = predict taken
|
||||
|
||||
enum Membar_mask_bits { // page 184, v9
|
||||
StoreStore = 1 << 3,
|
||||
LoadStore = 1 << 2,
|
||||
StoreLoad = 1 << 1,
|
||||
LoadLoad = 1 << 0,
|
||||
|
||||
Sync = 1 << 6,
|
||||
MemIssue = 1 << 5,
|
||||
Lookaside = 1 << 4
|
||||
};
|
||||
|
||||
// instruction must start at passed address
|
||||
static int instr_len(unsigned char *instr) { return BytesPerInstWord; }
|
||||
|
||||
@ -875,19 +882,20 @@ class Assembler : public AbstractAssembler {
|
||||
#define inv_opp_s_field(x, hi_bit, lo_bit) inv_s_field_ppc(x, 31-(lo_bit), 31-(hi_bit))
|
||||
// Extract instruction fields from instruction words.
|
||||
public:
|
||||
static int inv_ra_field(int x) { return inv_opp_u_field(x, 15, 11); }
|
||||
static int inv_rb_field(int x) { return inv_opp_u_field(x, 20, 16); }
|
||||
static int inv_rt_field(int x) { return inv_opp_u_field(x, 10, 6); }
|
||||
static int inv_rs_field(int x) { return inv_opp_u_field(x, 10, 6); }
|
||||
static int inv_ra_field(int x) { return inv_opp_u_field(x, 15, 11); }
|
||||
static int inv_rb_field(int x) { return inv_opp_u_field(x, 20, 16); }
|
||||
static int inv_rt_field(int x) { return inv_opp_u_field(x, 10, 6); }
|
||||
static int inv_rta_field(int x) { return inv_opp_u_field(x, 15, 11); }
|
||||
static int inv_rs_field(int x) { return inv_opp_u_field(x, 10, 6); }
|
||||
// Ds uses opp_s_field(x, 31, 16), but lowest 2 bits must be 0.
|
||||
// Inv_ds_field uses range (x, 29, 16) but shifts by 2 to ensure that lowest bits are 0.
|
||||
static int inv_ds_field(int x) { return inv_opp_s_field(x, 29, 16) << 2; }
|
||||
static int inv_d1_field(int x) { return inv_opp_s_field(x, 31, 16); }
|
||||
static int inv_si_field(int x) { return inv_opp_s_field(x, 31, 16); }
|
||||
static int inv_to_field(int x) { return inv_opp_u_field(x, 10, 6); }
|
||||
static int inv_lk_field(int x) { return inv_opp_u_field(x, 31, 31); }
|
||||
static int inv_bo_field(int x) { return inv_opp_u_field(x, 10, 6); }
|
||||
static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); }
|
||||
static int inv_ds_field(int x) { return inv_opp_s_field(x, 29, 16) << 2; }
|
||||
static int inv_d1_field(int x) { return inv_opp_s_field(x, 31, 16); }
|
||||
static int inv_si_field(int x) { return inv_opp_s_field(x, 31, 16); }
|
||||
static int inv_to_field(int x) { return inv_opp_u_field(x, 10, 6); }
|
||||
static int inv_lk_field(int x) { return inv_opp_u_field(x, 31, 31); }
|
||||
static int inv_bo_field(int x) { return inv_opp_u_field(x, 10, 6); }
|
||||
static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); }
|
||||
|
||||
#define opp_u_field(x, hi_bit, lo_bit) u_field(x, 31-(lo_bit), 31-(hi_bit))
|
||||
#define opp_s_field(x, hi_bit, lo_bit) s_field(x, 31-(lo_bit), 31-(hi_bit))
|
||||
@ -925,6 +933,7 @@ class Assembler : public AbstractAssembler {
|
||||
static int l10( int x) { return opp_u_field(x, 10, 10); }
|
||||
static int l15( int x) { return opp_u_field(x, 15, 15); }
|
||||
static int l910( int x) { return opp_u_field(x, 10, 9); }
|
||||
static int e1215( int x) { return opp_u_field(x, 15, 12); }
|
||||
static int lev( int x) { return opp_u_field(x, 26, 20); }
|
||||
static int li( int x) { return opp_s_field(x, 29, 6); }
|
||||
static int lk( int x) { return opp_u_field(x, 31, 31); }
|
||||
@ -960,13 +969,13 @@ class Assembler : public AbstractAssembler {
|
||||
static int sr( int x) { return opp_u_field(x, 15, 12); }
|
||||
static int tbr( int x) { return opp_u_field(x, 20, 11); }
|
||||
static int th( int x) { return opp_u_field(x, 10, 7); }
|
||||
static int thct( int x) { assert((x&8)==0, "must be valid cache specification"); return th(x); }
|
||||
static int thds( int x) { assert((x&8)==8, "must be valid stream specification"); return th(x); }
|
||||
static int thct( int x) { assert((x&8) == 0, "must be valid cache specification"); return th(x); }
|
||||
static int thds( int x) { assert((x&8) == 8, "must be valid stream specification"); return th(x); }
|
||||
static int to( int x) { return opp_u_field(x, 10, 6); }
|
||||
static int u( int x) { return opp_u_field(x, 19, 16); }
|
||||
static int ui( int x) { return opp_u_field(x, 31, 16); }
|
||||
|
||||
// support vector instructions for >= Power6
|
||||
// Support vector instructions for >= Power6.
|
||||
static int vra( int x) { return opp_u_field(x, 15, 11); }
|
||||
static int vrb( int x) { return opp_u_field(x, 20, 16); }
|
||||
static int vrc( int x) { return opp_u_field(x, 25, 21); }
|
||||
@ -1090,8 +1099,8 @@ class Assembler : public AbstractAssembler {
|
||||
inline void subfic( Register d, Register a, int si16);
|
||||
inline void add( Register d, Register a, Register b);
|
||||
inline void add_( Register d, Register a, Register b);
|
||||
inline void subf( Register d, Register a, Register b);
|
||||
inline void sub( Register d, Register a, Register b);
|
||||
inline void subf( Register d, Register a, Register b); // d = b - a "Sub_from", as in ppc spec.
|
||||
inline void sub( Register d, Register a, Register b); // d = a - b Swap operands of subf for readability.
|
||||
inline void subf_( Register d, Register a, Register b);
|
||||
inline void addc( Register d, Register a, Register b);
|
||||
inline void addc_( Register d, Register a, Register b);
|
||||
@ -1204,7 +1213,7 @@ class Assembler : public AbstractAssembler {
|
||||
}
|
||||
// endgroup opcode for Power6
|
||||
static bool is_endgroup(int x) {
|
||||
return is_ori(x) && inv_ra_field(x)==1 && inv_rs_field(x)==1 && inv_d1_field(x)==0;
|
||||
return is_ori(x) && inv_ra_field(x) == 1 && inv_rs_field(x) == 1 && inv_d1_field(x) == 0;
|
||||
}
|
||||
|
||||
|
||||
@ -1227,9 +1236,13 @@ class Assembler : public AbstractAssembler {
|
||||
inline void cmpld( ConditionRegister crx, Register a, Register b);
|
||||
|
||||
inline void isel( Register d, Register a, Register b, int bc);
|
||||
// Convenient version which takes: Condition register, Condition code and invert flag. Omit b to keep old value.
|
||||
inline void isel( Register d, ConditionRegister cr, Condition cc, bool inv, Register a, Register b = noreg);
|
||||
// Set d = 0 if (cr.cc) equals 1, otherwise b.
|
||||
inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg);
|
||||
|
||||
// PPC 1, section 3.3.11, Fixed-Point Logical Instructions
|
||||
void andi( Register a, Register s, int ui16); // optimized version
|
||||
void andi( Register a, Register s, int ui16); // optimized version
|
||||
inline void andi_( Register a, Register s, int ui16);
|
||||
inline void andis_( Register a, Register s, int ui16);
|
||||
inline void ori( Register a, Register s, int ui16);
|
||||
@ -1553,10 +1566,7 @@ class Assembler : public AbstractAssembler {
|
||||
inline void ptesync();
|
||||
inline void eieio();
|
||||
inline void isync();
|
||||
|
||||
inline void release();
|
||||
inline void acquire();
|
||||
inline void fence();
|
||||
inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8)
|
||||
|
||||
// atomics
|
||||
inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
|
||||
@ -1938,7 +1948,7 @@ class Assembler : public AbstractAssembler {
|
||||
inline void load_const(Register d, AddressLiteral& a, Register tmp = noreg);
|
||||
|
||||
// Load a 64 bit constant, optimized, not identifyable.
|
||||
// Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
|
||||
// Tmp can be used to increase ILP. Set return_simm16_rest = true to get a
|
||||
// 16 bit immediate offset. This is useful if the offset can be encoded in
|
||||
// a succeeding instruction.
|
||||
int load_const_optimized(Register d, long a, Register tmp = noreg, bool return_simm16_rest = false);
|
||||
|
@ -224,8 +224,12 @@ inline void Assembler::clrlsldi_(Register a, Register s, int clrl6, int shl6) {
|
||||
inline void Assembler::extrdi( Register a, Register s, int n, int b){ Assembler::rldicl(a, s, b+n, 64-n); }
|
||||
// testbit with condition register.
|
||||
inline void Assembler::testbitdi(ConditionRegister cr, Register a, Register s, int ui6) {
|
||||
Assembler::rldicr(a, s, 63-ui6, 0);
|
||||
Assembler::cmpdi(cr, a, 0);
|
||||
if (cr == CCR0) {
|
||||
Assembler::rldicr_(a, s, 63-ui6, 0);
|
||||
} else {
|
||||
Assembler::rldicr(a, s, 63-ui6, 0);
|
||||
Assembler::cmpdi(cr, a, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// rotate instructions
|
||||
@ -423,6 +427,27 @@ inline void Assembler::creqv( int d, int s1, int s2) { emit_int32(CREQV_OPCODE
|
||||
inline void Assembler::crandc(int d, int s1, int s2) { emit_int32(CRANDC_OPCODE | bt(d) | ba(s1) | bb(s2)); }
|
||||
inline void Assembler::crorc( int d, int s1, int s2) { emit_int32(CRORC_OPCODE | bt(d) | ba(s1) | bb(s2)); }
|
||||
|
||||
// Conditional move (>= Power7)
|
||||
inline void Assembler::isel(Register d, ConditionRegister cr, Condition cc, bool inv, Register a, Register b) {
|
||||
if (b == noreg) {
|
||||
b = d; // Can be omitted if old value should be kept in "else" case.
|
||||
}
|
||||
Register first = a;
|
||||
Register second = b;
|
||||
if (inv) {
|
||||
first = b;
|
||||
second = a; // exchange
|
||||
}
|
||||
assert(first != R0, "r0 not allowed");
|
||||
isel(d, first, second, bi0(cr, cc));
|
||||
}
|
||||
inline void Assembler::isel_0(Register d, ConditionRegister cr, Condition cc, Register b) {
|
||||
if (b == noreg) {
|
||||
b = d; // Can be omitted if old value should be kept in "else" case.
|
||||
}
|
||||
isel(d, R0, b, bi0(cr, cc));
|
||||
}
|
||||
|
||||
// PPC 2, section 3.2.1 Instruction Cache Instructions
|
||||
inline void Assembler::icbi( Register s1, Register s2) { emit_int32( ICBI_OPCODE | ra0mem(s1) | rb(s2) ); }
|
||||
// PPC 2, section 3.2.2 Data Cache Instructions
|
||||
@ -445,10 +470,7 @@ inline void Assembler::lwsync() { Assembler::sync(1); }
|
||||
inline void Assembler::ptesync() { Assembler::sync(2); }
|
||||
inline void Assembler::eieio() { emit_int32( EIEIO_OPCODE); }
|
||||
inline void Assembler::isync() { emit_int32( ISYNC_OPCODE); }
|
||||
|
||||
inline void Assembler::release() { Assembler::lwsync(); }
|
||||
inline void Assembler::acquire() { Assembler::lwsync(); }
|
||||
inline void Assembler::fence() { Assembler::sync(); }
|
||||
inline void Assembler::elemental_membar(int e) { assert(0 < e && e < 16, "invalid encoding"); emit_int32( SYNC_OPCODE | e1215(e)); }
|
||||
|
||||
// atomics
|
||||
// Use ra0mem to disallow R0 as base.
|
||||
@ -767,7 +789,6 @@ inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL
|
||||
inline void Assembler::lvsl( VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | rb(s2)); }
|
||||
inline void Assembler::lvsr( VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | rb(s2)); }
|
||||
|
||||
|
||||
inline void Assembler::load_const(Register d, void* x, Register tmp) {
|
||||
load_const(d, (long)x, tmp);
|
||||
}
|
||||
|
@ -100,6 +100,7 @@ public:
|
||||
#define SET_LOCALS_DOUBLE(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value))
|
||||
#define SET_LOCALS_LONG(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value))
|
||||
#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \
|
||||
((VMJavaVal64*)(addr))->d)
|
||||
|
||||
|
||||
#endif // CPU_PPC_VM_BYTECODEINTERPRETER_PPC_PP
|
||||
|
@ -33,7 +33,7 @@ class Bytes: AllStatic {
|
||||
// Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
|
||||
// PowerPC needs to check for alignment.
|
||||
|
||||
// can I count on address always being a pointer to an unsigned char? Yes
|
||||
// Can I count on address always being a pointer to an unsigned char? Yes.
|
||||
|
||||
// Returns true, if the byte ordering used by Java is different from the nativ byte ordering
|
||||
// of the underlying machine. For example, true for Intel x86, False, for Solaris on Sparc.
|
||||
@ -141,7 +141,6 @@ class Bytes: AllStatic {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering)
|
||||
// (no byte-order reversal is needed since Power CPUs are big-endian oriented).
|
||||
static inline u2 get_Java_u2(address p) { return get_native_u2(p); }
|
||||
|
98
hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp
Normal file
98
hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2013 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CPU_PPC_VM_C2_GLOBALS_PPC_HPP
|
||||
#define CPU_PPC_VM_C2_GLOBALS_PPC_HPP
|
||||
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
// Sets the default values for platform dependent flags used by the server compiler.
|
||||
// (see c2_globals.hpp).
|
||||
|
||||
define_pd_global(bool, BackgroundCompilation, true);
|
||||
define_pd_global(bool, CICompileOSR, true);
|
||||
define_pd_global(bool, InlineIntrinsics, true);
|
||||
define_pd_global(bool, PreferInterpreterNativeStubs, false);
|
||||
define_pd_global(bool, ProfileTraps, true);
|
||||
define_pd_global(bool, UseOnStackReplacement, true);
|
||||
define_pd_global(bool, ProfileInterpreter, true);
|
||||
define_pd_global(bool, TieredCompilation, false);
|
||||
define_pd_global(intx, CompileThreshold, 10000);
|
||||
define_pd_global(intx, BackEdgeThreshold, 140000);
|
||||
|
||||
define_pd_global(intx, OnStackReplacePercentage, 140);
|
||||
define_pd_global(intx, ConditionalMoveLimit, 3);
|
||||
define_pd_global(intx, FLOATPRESSURE, 28);
|
||||
define_pd_global(intx, FreqInlineSize, 175);
|
||||
define_pd_global(intx, MinJumpTableSize, 10);
|
||||
define_pd_global(intx, INTPRESSURE, 25);
|
||||
define_pd_global(intx, InteriorEntryAlignment, 16);
|
||||
define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
|
||||
define_pd_global(intx, RegisterCostAreaRatio, 16000);
|
||||
define_pd_global(bool, UseTLAB, true);
|
||||
define_pd_global(bool, ResizeTLAB, true);
|
||||
define_pd_global(intx, LoopUnrollLimit, 60);
|
||||
|
||||
// Peephole and CISC spilling both break the graph, and so make the
|
||||
// scheduler sick.
|
||||
define_pd_global(bool, OptoPeephole, false);
|
||||
define_pd_global(bool, UseCISCSpill, false);
|
||||
define_pd_global(bool, OptoBundling, false);
|
||||
// GL:
|
||||
// Detected a problem with unscaled compressed oops and
|
||||
// narrow_oop_use_complex_address() == false.
|
||||
// -Djava.io.tmpdir=./tmp -jar SPECjvm2008.jar -ikv -wt 3 -it 3
|
||||
// -bt 1 --base compiler.sunflow
|
||||
// fails in Lower.visitIf->translate->tranlate->translate and
|
||||
// throws an unexpected NPE. A load and a store seem to be
|
||||
// reordered. Java reads about:
|
||||
// loc = x.f
|
||||
// x.f = 0
|
||||
// NullCheck loc
|
||||
// While assembler reads:
|
||||
// x.f = 0
|
||||
// loc = x.f
|
||||
// NullCheck loc
|
||||
define_pd_global(bool, OptoScheduling, false);
|
||||
|
||||
define_pd_global(intx, InitialCodeCacheSize, 2048*K); // Integral multiple of CodeCacheExpansionSize
|
||||
define_pd_global(intx, ReservedCodeCacheSize, 256*M);
|
||||
define_pd_global(intx, CodeCacheExpansionSize, 64*K);
|
||||
|
||||
// Ergonomics related flags
|
||||
define_pd_global(uint64_t,MaxRAM, 4ULL*G);
|
||||
define_pd_global(uintx, CodeCacheMinBlockLength, 4);
|
||||
define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K);
|
||||
|
||||
define_pd_global(bool, TrapBasedRangeChecks, false);
|
||||
|
||||
// Heap related flags
|
||||
define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M));
|
||||
|
||||
// Ergonomics related flags
|
||||
define_pd_global(bool, NeverActAsServerClassMachine, false);
|
||||
|
||||
#endif // CPU_PPC_VM_C2_GLOBALS_PPC_HPP
|
48
hotspot/src/cpu/ppc/vm/c2_init_ppc.cpp
Normal file
48
hotspot/src/cpu/ppc/vm/c2_init_ppc.cpp
Normal file
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2013 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "opto/compile.hpp"
|
||||
#include "opto/node.hpp"
|
||||
#include "runtime/globals.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
|
||||
// processor dependent initialization for ppc
|
||||
|
||||
void Compile::pd_compiler2_init() {
|
||||
|
||||
// Power7 and later
|
||||
if (PowerArchitecturePPC64 > 6) {
|
||||
if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
|
||||
FLAG_SET_ERGO(bool, UsePopCountInstruction, true);
|
||||
}
|
||||
}
|
||||
|
||||
if (PowerArchitecturePPC64 == 6) {
|
||||
if (FLAG_IS_DEFAULT(InsertEndGroupPPC64)) {
|
||||
FLAG_SET_ERGO(bool, InsertEndGroupPPC64, true);
|
||||
}
|
||||
}
|
||||
}
|
@ -105,10 +105,12 @@ static void copy_conjoint_atomic(T* from, T* to, size_t count) {
|
||||
}
|
||||
|
||||
static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
|
||||
// TODO: contribute optimized version.
|
||||
copy_conjoint_atomic<jshort>(from, to, count);
|
||||
}
|
||||
|
||||
static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
|
||||
// TODO: contribute optimized version.
|
||||
copy_conjoint_atomic<jint>(from, to, count);
|
||||
}
|
||||
|
||||
@ -125,10 +127,12 @@ static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count
|
||||
}
|
||||
|
||||
static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
|
||||
// TODO: contribute optimized version.
|
||||
pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
|
||||
}
|
||||
|
||||
static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
|
||||
// TODO: contribute optimized version.
|
||||
pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
|
||||
}
|
||||
|
||||
|
@ -1981,8 +1981,7 @@ address CppInterpreterGenerator::generate_normal_entry(void) {
|
||||
|
||||
// Restore R14_state.
|
||||
__ ld(R14_state, 0, R1_SP);
|
||||
__ addi(R14_state, R14_state,
|
||||
-frame::interpreter_frame_cinterpreterstate_size_in_bytes());
|
||||
__ addi(R14_state, R14_state, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
|
||||
|
||||
//
|
||||
// Registers alive
|
||||
|
@ -176,13 +176,14 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result)
|
||||
Method* method = interpreter_frame_method();
|
||||
BasicType type = method->result_type();
|
||||
|
||||
#ifdef CC_INTERP
|
||||
if (method->is_native()) {
|
||||
// Prior to calling into the runtime to notify the method exit the possible
|
||||
// result value is saved into the interpreter frame.
|
||||
#ifdef CC_INTERP
|
||||
interpreterState istate = get_interpreterState();
|
||||
address lresult = (address)istate + in_bytes(BytecodeInterpreter::native_lresult_offset());
|
||||
address fresult = (address)istate + in_bytes(BytecodeInterpreter::native_fresult_offset());
|
||||
#endif
|
||||
|
||||
switch (method->result_type()) {
|
||||
case T_OBJECT:
|
||||
@ -226,9 +227,6 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result)
|
||||
default : ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
#else
|
||||
Unimplemented();
|
||||
#endif
|
||||
return type;
|
||||
}
|
||||
|
||||
|
@ -421,7 +421,7 @@
|
||||
#ifdef CC_INTERP
|
||||
// Additional interface for interpreter frames:
|
||||
inline interpreterState get_interpreterState() const;
|
||||
#endif
|
||||
#endif // CC_INTERP
|
||||
|
||||
// Size of a monitor in bytes.
|
||||
static int interpreter_frame_monitor_size_in_bytes();
|
||||
@ -431,7 +431,6 @@
|
||||
|
||||
private:
|
||||
|
||||
// PPC port: permgen stuff
|
||||
ConstantPoolCache** interpreter_frame_cpoolcache_addr() const;
|
||||
|
||||
public:
|
||||
|
@ -78,11 +78,8 @@ inline frame::frame(intptr_t* sp, address pc, intptr_t* unextended_sp) : _sp(sp)
|
||||
// can distinguish identity and younger/older relationship. NULL
|
||||
// represents an invalid (incomparable) frame.
|
||||
inline intptr_t* frame::id(void) const {
|
||||
// Use the _unextended_pc as the frame's ID. Because we have no
|
||||
// adapters, but resized compiled frames, some of the new code
|
||||
// (e.g. JVMTI) wouldn't work if we return the (current) SP of the
|
||||
// frame.
|
||||
return _unextended_sp;
|
||||
// Use _fp. _sp or _unextended_sp wouldn't be correct due to resizing.
|
||||
return _fp;
|
||||
}
|
||||
|
||||
// Return true if this frame is older (less recent activation) than
|
||||
|
@ -62,6 +62,13 @@ define_pd_global(uintx, TypeProfileLevel, 0);
|
||||
|
||||
// Platform dependent flag handling: flags only defined on this platform.
|
||||
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
|
||||
\
|
||||
/* Load poll address from thread. This is used to implement per-thread */ \
|
||||
/* safepoints on platforms != IA64. */ \
|
||||
product(bool, LoadPollAddressFromThread, false, \
|
||||
"Load polling page address from thread object (required for " \
|
||||
"per-thread safepoints on platforms != IA64)") \
|
||||
\
|
||||
product(uintx, PowerArchitecturePPC64, 0, \
|
||||
"CPU Version: x for PowerX. Currently recognizes Power5 to " \
|
||||
"Power7. Default is 0. CPUs newer than Power7 will be " \
|
||||
@ -88,6 +95,14 @@ define_pd_global(uintx, TypeProfileLevel, 0);
|
||||
\
|
||||
product(bool, UseStaticBranchPredictionInCompareAndSwapPPC64, true, \
|
||||
"Use static branch prediction hints in CAS operations.") \
|
||||
product(bool, UseStaticBranchPredictionForUncommonPathsPPC64, false, \
|
||||
"Use static branch prediction hints for uncommon paths.") \
|
||||
\
|
||||
product(bool, UsePower6SchedulerPPC64, false, \
|
||||
"Use Power6 Scheduler.") \
|
||||
\
|
||||
product(bool, InsertEndGroupPPC64, false, \
|
||||
"Insert EndGroup instructions to optimize for Power6.") \
|
||||
\
|
||||
/* Trap based checks. */ \
|
||||
/* Trap based checks use the ppc trap instructions to check certain */ \
|
||||
@ -108,5 +123,4 @@ define_pd_global(uintx, TypeProfileLevel, 0);
|
||||
" Use this to ease debugging.") \
|
||||
|
||||
|
||||
|
||||
#endif // CPU_PPC_VM_GLOBALS_PPC_HPP
|
||||
|
@ -28,17 +28,17 @@
|
||||
#include "runtime/icache.hpp"
|
||||
|
||||
// Use inline assembler to implement icache flush.
|
||||
int ppc64_flush_icache(address start, int lines, int magic){
|
||||
int ICache::ppc64_flush_icache(address start, int lines, int magic) {
|
||||
address end = start + (unsigned int)lines*ICache::line_size;
|
||||
assert(start <= end, "flush_icache parms");
|
||||
|
||||
// store modified cache lines from data cache
|
||||
for (address a=start; a<end; a+=ICache::line_size) {
|
||||
for (address a = start; a < end; a += ICache::line_size) {
|
||||
__asm__ __volatile__(
|
||||
"dcbst 0, %0 \n"
|
||||
:
|
||||
: "r" (a)
|
||||
: "memory");
|
||||
"dcbst 0, %0 \n"
|
||||
:
|
||||
: "r" (a)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
// sync instruction
|
||||
@ -49,20 +49,20 @@ int ppc64_flush_icache(address start, int lines, int magic){
|
||||
: "memory");
|
||||
|
||||
// invalidate respective cache lines in instruction cache
|
||||
for (address a=start; a<end; a+=ICache::line_size) {
|
||||
for (address a = start; a < end; a += ICache::line_size) {
|
||||
__asm__ __volatile__(
|
||||
"icbi 0, %0 \n"
|
||||
:
|
||||
: "r" (a)
|
||||
: "memory");
|
||||
"icbi 0, %0 \n"
|
||||
:
|
||||
: "r" (a)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
// discard fetched instructions
|
||||
__asm__ __volatile__(
|
||||
"isync \n"
|
||||
:
|
||||
:
|
||||
: "memory");
|
||||
"isync \n"
|
||||
:
|
||||
:
|
||||
: "memory");
|
||||
|
||||
return magic;
|
||||
}
|
||||
@ -70,7 +70,7 @@ int ppc64_flush_icache(address start, int lines, int magic){
|
||||
void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
|
||||
StubCodeMark mark(this, "ICache", "flush_icache_stub");
|
||||
|
||||
*flush_icache_stub = (ICache::flush_icache_stub_t)ppc64_flush_icache;
|
||||
*flush_icache_stub = (ICache::flush_icache_stub_t)ICache::ppc64_flush_icache;
|
||||
|
||||
// First call to flush itself
|
||||
ICache::invalidate_range((address)(*flush_icache_stub), 0);
|
||||
|
@ -30,15 +30,23 @@
|
||||
// code, part of the processor instruction cache potentially has to be flushed.
|
||||
|
||||
class ICache : public AbstractICache {
|
||||
friend class ICacheStubGenerator;
|
||||
static int ppc64_flush_icache(address start, int lines, int magic);
|
||||
|
||||
public:
|
||||
enum {
|
||||
// On PowerPC the cache line size is 32 bytes.
|
||||
stub_size = 160, // Size of the icache flush stub in bytes.
|
||||
line_size = 32, // Flush instruction affects 32 bytes.
|
||||
log2_line_size = 5 // log2(line_size)
|
||||
// Actually, cache line size is 64, but keeping it as it is to be
|
||||
// on the safe side on ALL PPC64 implementations.
|
||||
log2_line_size = 5,
|
||||
line_size = 1 << log2_line_size
|
||||
};
|
||||
|
||||
// Use default implementation
|
||||
static void ppc64_flush_icache_bytes(address start, int bytes) {
|
||||
// Align start address to an icache line boundary and transform
|
||||
// nbytes to an icache line count.
|
||||
const uint line_offset = mask_address_bits(start, line_size - 1);
|
||||
ppc64_flush_icache(start - line_offset, (bytes + line_offset + line_size - 1) >> log2_line_size, 0);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // CPU_PPC_VM_ICACHE_PPC_HPP
|
||||
|
@ -30,13 +30,21 @@
|
||||
#include "interp_masm_ppc_64.hpp"
|
||||
#include "interpreter/interpreterRuntime.hpp"
|
||||
|
||||
|
||||
#ifdef PRODUCT
|
||||
#define BLOCK_COMMENT(str) // nothing
|
||||
#else
|
||||
#define BLOCK_COMMENT(str) block_comment(str)
|
||||
#endif
|
||||
|
||||
void InterpreterMacroAssembler::null_check_throw(Register a, int offset, Register temp_reg) {
|
||||
#ifdef CC_INTERP
|
||||
address exception_entry = StubRoutines::throw_NullPointerException_at_call_entry();
|
||||
#else
|
||||
address exception_entry = Interpreter::throw_NullPointerException_entry();
|
||||
#endif
|
||||
MacroAssembler::null_check_throw(a, offset, temp_reg, exception_entry);
|
||||
}
|
||||
|
||||
// Lock object
|
||||
//
|
||||
// Registers alive
|
||||
@ -47,7 +55,7 @@
|
||||
void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
|
||||
if (UseHeavyMonitors) {
|
||||
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
|
||||
monitor, /*check_for_exceptions=*/false);
|
||||
monitor, /*check_for_exceptions=*/true CC_INTERP_ONLY(&& false));
|
||||
} else {
|
||||
// template code:
|
||||
//
|
||||
@ -69,7 +77,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
|
||||
const Register tmp = R10_ARG8;
|
||||
|
||||
Label done;
|
||||
Label slow_case;
|
||||
Label cas_failed, slow_case;
|
||||
|
||||
assert_different_registers(displaced_header, object_mark_addr, current_header, tmp);
|
||||
|
||||
@ -91,7 +99,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
|
||||
|
||||
// Initialize the box (Must happen before we update the object mark!).
|
||||
std(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
|
||||
BasicLock::displaced_header_offset_in_bytes(), monitor);
|
||||
BasicLock::displaced_header_offset_in_bytes(), monitor);
|
||||
|
||||
// if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
|
||||
|
||||
@ -106,12 +114,14 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
|
||||
/*compare_value=*/displaced_header, /*exchange_value=*/monitor,
|
||||
/*where=*/object_mark_addr,
|
||||
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
|
||||
MacroAssembler::cmpxchgx_hint_acquire_lock());
|
||||
MacroAssembler::cmpxchgx_hint_acquire_lock(),
|
||||
noreg,
|
||||
&cas_failed);
|
||||
|
||||
// If the compare-and-exchange succeeded, then we found an unlocked
|
||||
// object and we have now locked it.
|
||||
beq(CCR0, done);
|
||||
|
||||
b(done);
|
||||
bind(cas_failed);
|
||||
|
||||
// } else if (THREAD->is_lock_owned((address)displaced_header))
|
||||
// // Simple recursive case.
|
||||
@ -134,7 +144,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
|
||||
bne(CCR0, slow_case);
|
||||
release();
|
||||
std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
|
||||
BasicLock::displaced_header_offset_in_bytes(), monitor);
|
||||
BasicLock::displaced_header_offset_in_bytes(), monitor);
|
||||
b(done);
|
||||
|
||||
|
||||
@ -146,7 +156,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
|
||||
// slow case of monitor enter.
|
||||
bind(slow_case);
|
||||
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
|
||||
monitor, /*check_for_exceptions=*/false);
|
||||
monitor, /*check_for_exceptions=*/true CC_INTERP_ONLY(&& false));
|
||||
// }
|
||||
|
||||
bind(done);
|
||||
@ -160,7 +170,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
|
||||
// which must be initialized with the object to lock.
|
||||
//
|
||||
// Throw IllegalMonitorException if object is not locked by current thread.
|
||||
void InterpreterMacroAssembler::unlock_object(Register monitor) {
|
||||
void InterpreterMacroAssembler::unlock_object(Register monitor, bool check_for_exceptions) {
|
||||
if (UseHeavyMonitors) {
|
||||
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
|
||||
monitor, /*check_for_exceptions=*/false);
|
||||
@ -184,9 +194,8 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
|
||||
const Register object_mark_addr = R9_ARG7;
|
||||
const Register current_header = R10_ARG8;
|
||||
|
||||
Label no_recursive_unlock;
|
||||
Label free_slot;
|
||||
Label slow_case;
|
||||
Label done;
|
||||
|
||||
assert_different_registers(object, displaced_header, object_mark_addr, current_header);
|
||||
|
||||
@ -194,7 +203,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
|
||||
// The object address from the monitor is in object.
|
||||
ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
|
||||
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
|
||||
biased_locking_exit(CCR0, object, displaced_header, done);
|
||||
biased_locking_exit(CCR0, object, displaced_header, free_slot);
|
||||
}
|
||||
|
||||
// Test first if we are in the fast recursive case.
|
||||
@ -203,13 +212,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
|
||||
|
||||
// If the displaced header is zero, we have a recursive unlock.
|
||||
cmpdi(CCR0, displaced_header, 0);
|
||||
bne(CCR0, no_recursive_unlock);
|
||||
// Release in recursive unlock is not necessary.
|
||||
// release();
|
||||
std(displaced_header/*==0!*/, BasicObjectLock::obj_offset_in_bytes(), monitor);
|
||||
b(done);
|
||||
|
||||
bind(no_recursive_unlock);
|
||||
beq(CCR0, free_slot); // recursive unlock
|
||||
|
||||
// } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
|
||||
// // We swapped the unlocked mark in displaced_header into the object's mark word.
|
||||
@ -218,7 +221,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
|
||||
// If we still have a lightweight lock, unlock the object and be done.
|
||||
|
||||
// The object address from the monitor is in object.
|
||||
ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
|
||||
if (!UseBiasedLocking) ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
|
||||
addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());
|
||||
|
||||
// We have the displaced header in displaced_header. If the lock is still
|
||||
@ -229,17 +232,11 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
|
||||
/*current_value=*/current_header,
|
||||
/*compare_value=*/monitor, /*exchange_value=*/displaced_header,
|
||||
/*where=*/object_mark_addr,
|
||||
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
|
||||
MacroAssembler::cmpxchgx_hint_release_lock());
|
||||
bne(CCR0, slow_case);
|
||||
|
||||
// Exchange worked, do monitor->set_obj(NULL).
|
||||
li(R0, 0);
|
||||
// Must realease earlier (see cmpxchgd above).
|
||||
// release();
|
||||
std(R0, BasicObjectLock::obj_offset_in_bytes(), monitor);
|
||||
b(done);
|
||||
|
||||
MacroAssembler::MemBarRel,
|
||||
MacroAssembler::cmpxchgx_hint_release_lock(),
|
||||
noreg,
|
||||
&slow_case);
|
||||
b(free_slot);
|
||||
|
||||
// } else {
|
||||
// // Slow path.
|
||||
@ -249,9 +246,17 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
|
||||
// we need to get into the slow case.
|
||||
bind(slow_case);
|
||||
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
|
||||
monitor, /*check_for_exceptions=*/false);
|
||||
monitor, check_for_exceptions CC_INTERP_ONLY(&& false));
|
||||
// }
|
||||
|
||||
Label done;
|
||||
b(done); // Monitor register may be overwritten! Runtime has already freed the slot.
|
||||
|
||||
// Exchange worked, do monitor->set_obj(NULL);
|
||||
align(32, 12);
|
||||
bind(free_slot);
|
||||
li(R0, 0);
|
||||
std(R0, BasicObjectLock::obj_offset_in_bytes(), monitor);
|
||||
bind(done);
|
||||
}
|
||||
}
|
||||
@ -375,6 +380,7 @@ void InterpreterMacroAssembler::notify_method_exit(bool is_native_method, TosSta
|
||||
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit),
|
||||
/*check_exceptions=*/false);
|
||||
|
||||
align(32, 12);
|
||||
bind(jvmti_post_done);
|
||||
}
|
||||
}
|
||||
|
@ -37,6 +37,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
|
||||
public:
|
||||
InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
|
||||
|
||||
void null_check_throw(Register a, int offset, Register temp_reg);
|
||||
|
||||
// Handy address generation macros
|
||||
#define thread_(field_name) in_bytes(JavaThread::field_name ## _offset()), R16_thread
|
||||
#define method_(field_name) in_bytes(Method::field_name ## _offset()), R19_method
|
||||
@ -51,15 +53,16 @@ class InterpreterMacroAssembler: public MacroAssembler {
|
||||
|
||||
// Object locking
|
||||
void lock_object (Register lock_reg, Register obj_reg);
|
||||
void unlock_object(Register lock_reg);
|
||||
void unlock_object(Register lock_reg, bool check_for_exceptions = true);
|
||||
|
||||
// Debugging
|
||||
void verify_oop(Register reg, TosState state = atos); // only if +VerifyOops && state == atos
|
||||
|
||||
// support for jvmdi/jvmpi
|
||||
void notify_method_entry();
|
||||
void notify_method_exit(bool save_result, TosState state);
|
||||
void notify_method_exit(bool is_native_method, TosState state);
|
||||
|
||||
#ifdef CC_INTERP
|
||||
// Convert the current TOP_IJAVA_FRAME into a PARENT_IJAVA_FRAME
|
||||
// (using parent_frame_resize) and push a new interpreter
|
||||
// TOP_IJAVA_FRAME (using frame_size).
|
||||
@ -84,6 +87,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
|
||||
void pop_interpreter_state(bool prev_state_may_be_0);
|
||||
|
||||
void restore_prev_state();
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif // CPU_PPC_VM_INTERP_MASM_PPC_64_HPP
|
||||
|
@ -396,18 +396,14 @@ address AbstractInterpreterGenerator::generate_result_handler_for(BasicType type
|
||||
//
|
||||
|
||||
Label done;
|
||||
Label is_false;
|
||||
|
||||
address entry = __ pc();
|
||||
|
||||
switch (type) {
|
||||
case T_BOOLEAN:
|
||||
__ cmpwi(CCR0, R3_RET, 0);
|
||||
__ beq(CCR0, is_false);
|
||||
__ li(R3_RET, 1);
|
||||
__ b(done);
|
||||
__ bind(is_false);
|
||||
__ li(R3_RET, 0);
|
||||
// convert !=0 to 1
|
||||
__ neg(R0, R3_RET);
|
||||
__ orr(R0, R3_RET, R0);
|
||||
__ srwi(R3_RET, R0, 31);
|
||||
break;
|
||||
case T_BYTE:
|
||||
// sign extend 8 bits
|
||||
@ -478,7 +474,7 @@ address InterpreterGenerator::generate_abstract_entry(void) {
|
||||
|
||||
// Push a new C frame and save LR.
|
||||
__ save_LR_CR(R0);
|
||||
__ push_frame_abi112_nonvolatiles(0, R11_scratch1);
|
||||
__ push_frame_abi112(0, R11_scratch1);
|
||||
|
||||
// This is not a leaf but we have a JavaFrameAnchor now and we will
|
||||
// check (create) exceptions afterward so this is ok.
|
||||
@ -491,8 +487,12 @@ address InterpreterGenerator::generate_abstract_entry(void) {
|
||||
// Reset JavaFrameAnchor from call_VM_leaf above.
|
||||
__ reset_last_Java_frame();
|
||||
|
||||
#ifdef CC_INTERP
|
||||
// Return to frame manager, it will handle the pending exception.
|
||||
__ blr();
|
||||
#else
|
||||
Unimplemented();
|
||||
#endif
|
||||
|
||||
return entry;
|
||||
}
|
||||
@ -503,16 +503,20 @@ address InterpreterGenerator::generate_accessor_entry(void) {
|
||||
if(!UseFastAccessorMethods && (!FLAG_IS_ERGO(UseFastAccessorMethods)))
|
||||
return NULL;
|
||||
|
||||
Label Ldone, Lslow_path;
|
||||
Label Lslow_path, Lacquire;
|
||||
|
||||
const Register Rthis = R3_ARG1,
|
||||
const Register
|
||||
Rclass_or_obj = R3_ARG1,
|
||||
Rconst_method = R4_ARG2,
|
||||
Rcodes = Rconst_method,
|
||||
Rcpool_cache = R5_ARG3,
|
||||
Rscratch = R11_scratch1,
|
||||
Rjvmti_mode = Rscratch,
|
||||
Roffset = R12_scratch2,
|
||||
Rflags = R6_ARG4;
|
||||
Rflags = R6_ARG4,
|
||||
Rbtable = R7_ARG5;
|
||||
|
||||
static address branch_table[number_of_states];
|
||||
|
||||
address entry = __ pc();
|
||||
|
||||
@ -521,13 +525,9 @@ address InterpreterGenerator::generate_accessor_entry(void) {
|
||||
|
||||
// Also check for JVMTI mode
|
||||
// Check for null obj, take slow path if so.
|
||||
#ifdef CC_INTERP
|
||||
__ ld(Rthis, Interpreter::stackElementSize, R17_tos);
|
||||
#else
|
||||
Unimplemented()
|
||||
#endif
|
||||
__ ld(Rclass_or_obj, Interpreter::stackElementSize, CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp));
|
||||
__ lwz(Rjvmti_mode, thread_(interp_only_mode));
|
||||
__ cmpdi(CCR1, Rthis, 0);
|
||||
__ cmpdi(CCR1, Rclass_or_obj, 0);
|
||||
__ cmpwi(CCR0, Rjvmti_mode, 0);
|
||||
__ crorc(/*CCR0 eq*/2, /*CCR1 eq*/4+2, /*CCR0 eq*/2);
|
||||
__ beq(CCR0, Lslow_path); // this==null or jvmti_mode!=0
|
||||
@ -560,58 +560,127 @@ address InterpreterGenerator::generate_accessor_entry(void) {
|
||||
__ ld(Rflags, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::flags_offset()), Rcpool_cache);
|
||||
__ ld(Roffset, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f2_offset()), Rcpool_cache);
|
||||
|
||||
// Get field type.
|
||||
// (Rflags>>ConstantPoolCacheEntry::tos_state_shift)&((1<<ConstantPoolCacheEntry::tos_state_bits)-1)
|
||||
// Following code is from templateTable::getfield_or_static
|
||||
// Load pointer to branch table
|
||||
__ load_const_optimized(Rbtable, (address)branch_table, Rscratch);
|
||||
|
||||
// Get volatile flag
|
||||
__ rldicl(Rscratch, Rflags, 64-ConstantPoolCacheEntry::is_volatile_shift, 63); // extract volatile bit
|
||||
// note: sync is needed before volatile load on PPC64
|
||||
|
||||
// Check field type
|
||||
__ rldicl(Rflags, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);
|
||||
|
||||
#ifdef ASSERT
|
||||
__ ld(R9_ARG7, 0, R1_SP);
|
||||
__ ld(R10_ARG8, 0, R21_sender_SP);
|
||||
__ cmpd(CCR0, R9_ARG7, R10_ARG8);
|
||||
__ asm_assert_eq("backlink", 0x543);
|
||||
Label LFlagInvalid;
|
||||
__ cmpldi(CCR0, Rflags, number_of_states);
|
||||
__ bge(CCR0, LFlagInvalid);
|
||||
|
||||
__ ld(R9_ARG7, 0, R1_SP);
|
||||
__ ld(R10_ARG8, 0, R21_sender_SP);
|
||||
__ cmpd(CCR0, R9_ARG7, R10_ARG8);
|
||||
__ asm_assert_eq("backlink", 0x543);
|
||||
#endif // ASSERT
|
||||
__ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
|
||||
|
||||
// Load the return value according to field type.
|
||||
Label Litos, Lltos, Lbtos, Lctos, Lstos;
|
||||
__ cmpdi(CCR1, Rflags, itos);
|
||||
__ cmpdi(CCR0, Rflags, ltos);
|
||||
__ beq(CCR1, Litos);
|
||||
__ beq(CCR0, Lltos);
|
||||
__ cmpdi(CCR1, Rflags, btos);
|
||||
__ cmpdi(CCR0, Rflags, ctos);
|
||||
__ beq(CCR1, Lbtos);
|
||||
__ beq(CCR0, Lctos);
|
||||
__ cmpdi(CCR1, Rflags, stos);
|
||||
__ beq(CCR1, Lstos);
|
||||
// Load from branch table and dispatch (volatile case: one instruction ahead)
|
||||
__ sldi(Rflags, Rflags, LogBytesPerWord);
|
||||
__ cmpwi(CCR6, Rscratch, 1); // volatile?
|
||||
__ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // volatile ? size of 1 instruction : 0
|
||||
__ ldx(Rbtable, Rbtable, Rflags);
|
||||
|
||||
__ subf(Rbtable, Rscratch, Rbtable); // point to volatile/non-volatile entry point
|
||||
__ mtctr(Rbtable);
|
||||
__ bctr();
|
||||
|
||||
#ifdef ASSERT
|
||||
__ cmpdi(CCR0, Rflags, atos);
|
||||
__ asm_assert_eq("what type is this?", 0x432);
|
||||
__ bind(LFlagInvalid);
|
||||
__ stop("got invalid flag", 0x6541);
|
||||
|
||||
bool all_uninitialized = true,
|
||||
all_initialized = true;
|
||||
for (int i = 0; i<number_of_states; ++i) {
|
||||
all_uninitialized = all_uninitialized && (branch_table[i] == NULL);
|
||||
all_initialized = all_initialized && (branch_table[i] != NULL);
|
||||
}
|
||||
assert(all_uninitialized != all_initialized, "consistency"); // either or
|
||||
|
||||
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
|
||||
if (branch_table[vtos] == 0) branch_table[vtos] = __ pc(); // non-volatile_entry point
|
||||
if (branch_table[dtos] == 0) branch_table[dtos] = __ pc(); // non-volatile_entry point
|
||||
if (branch_table[ftos] == 0) branch_table[ftos] = __ pc(); // non-volatile_entry point
|
||||
__ stop("unexpected type", 0x6551);
|
||||
#endif
|
||||
// fallthru: __ bind(Latos);
|
||||
__ load_heap_oop(R3_RET, (RegisterOrConstant)Roffset, Rthis);
|
||||
|
||||
if (branch_table[itos] == 0) { // generate only once
|
||||
__ align(32, 28, 28); // align load
|
||||
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
|
||||
branch_table[itos] = __ pc(); // non-volatile_entry point
|
||||
__ lwax(R3_RET, Rclass_or_obj, Roffset);
|
||||
__ beq(CCR6, Lacquire);
|
||||
__ blr();
|
||||
}
|
||||
|
||||
if (branch_table[ltos] == 0) { // generate only once
|
||||
__ align(32, 28, 28); // align load
|
||||
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
|
||||
branch_table[ltos] = __ pc(); // non-volatile_entry point
|
||||
__ ldx(R3_RET, Rclass_or_obj, Roffset);
|
||||
__ beq(CCR6, Lacquire);
|
||||
__ blr();
|
||||
}
|
||||
|
||||
if (branch_table[btos] == 0) { // generate only once
|
||||
__ align(32, 28, 28); // align load
|
||||
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
|
||||
branch_table[btos] = __ pc(); // non-volatile_entry point
|
||||
__ lbzx(R3_RET, Rclass_or_obj, Roffset);
|
||||
__ extsb(R3_RET, R3_RET);
|
||||
__ beq(CCR6, Lacquire);
|
||||
__ blr();
|
||||
}
|
||||
|
||||
if (branch_table[ctos] == 0) { // generate only once
|
||||
__ align(32, 28, 28); // align load
|
||||
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
|
||||
branch_table[ctos] = __ pc(); // non-volatile_entry point
|
||||
__ lhzx(R3_RET, Rclass_or_obj, Roffset);
|
||||
__ beq(CCR6, Lacquire);
|
||||
__ blr();
|
||||
}
|
||||
|
||||
if (branch_table[stos] == 0) { // generate only once
|
||||
__ align(32, 28, 28); // align load
|
||||
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
|
||||
branch_table[stos] = __ pc(); // non-volatile_entry point
|
||||
__ lhax(R3_RET, Rclass_or_obj, Roffset);
|
||||
__ beq(CCR6, Lacquire);
|
||||
__ blr();
|
||||
}
|
||||
|
||||
if (branch_table[atos] == 0) { // generate only once
|
||||
__ align(32, 28, 28); // align load
|
||||
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
|
||||
branch_table[atos] = __ pc(); // non-volatile_entry point
|
||||
__ load_heap_oop(R3_RET, (RegisterOrConstant)Roffset, Rclass_or_obj);
|
||||
__ verify_oop(R3_RET);
|
||||
//__ dcbt(R3_RET); // prefetch
|
||||
__ beq(CCR6, Lacquire);
|
||||
__ blr();
|
||||
}
|
||||
|
||||
__ align(32, 12);
|
||||
__ bind(Lacquire);
|
||||
__ twi_0(R3_RET);
|
||||
__ isync(); // acquire
|
||||
__ blr();
|
||||
|
||||
__ bind(Litos);
|
||||
__ lwax(R3_RET, Rthis, Roffset);
|
||||
__ blr();
|
||||
|
||||
__ bind(Lltos);
|
||||
__ ldx(R3_RET, Rthis, Roffset);
|
||||
__ blr();
|
||||
|
||||
__ bind(Lbtos);
|
||||
__ lbzx(R3_RET, Rthis, Roffset);
|
||||
__ extsb(R3_RET, R3_RET);
|
||||
__ blr();
|
||||
|
||||
__ bind(Lctos);
|
||||
__ lhzx(R3_RET, Rthis, Roffset);
|
||||
__ blr();
|
||||
|
||||
__ bind(Lstos);
|
||||
__ lhax(R3_RET, Rthis, Roffset);
|
||||
__ blr();
|
||||
#ifdef ASSERT
|
||||
for (int i = 0; i<number_of_states; ++i) {
|
||||
assert(branch_table[i], "accessor_entry initialization");
|
||||
//tty->print_cr("accessor_entry: branch_table[%d] = 0x%llx (opcode 0x%llx)", i, branch_table[i], *((unsigned int*)branch_table[i]));
|
||||
}
|
||||
#endif
|
||||
|
||||
__ bind(Lslow_path);
|
||||
assert(Interpreter::entry_for_kind(Interpreter::zerolocals), "Normal entry must have been generated by now");
|
||||
@ -670,18 +739,14 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
|
||||
// continue and the thread will safepoint at the next bytecode dispatch.
|
||||
|
||||
// If the receiver is null then it is OK to jump to the slow path.
|
||||
#ifdef CC_INTERP
|
||||
__ ld(R3_RET, Interpreter::stackElementSize, R17_tos); // get receiver
|
||||
#else
|
||||
Unimplemented();
|
||||
#endif
|
||||
__ ld(R3_RET, Interpreter::stackElementSize, CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp)); // get receiver
|
||||
|
||||
// Check if receiver == NULL and go the slow path.
|
||||
__ cmpdi(CCR0, R3_RET, 0);
|
||||
__ beq(CCR0, slow_path);
|
||||
|
||||
// Load the value of the referent field.
|
||||
__ load_heap_oop_not_null(R3_RET, referent_offset, R3_RET);
|
||||
__ load_heap_oop(R3_RET, referent_offset, R3_RET);
|
||||
|
||||
// Generate the G1 pre-barrier code to log the value of
|
||||
// the referent field in an SATB buffer. Note with
|
||||
|
@ -40,8 +40,10 @@
|
||||
#define JNIIMPORT
|
||||
#endif
|
||||
|
||||
#define JNICALL
|
||||
typedef int jint;
|
||||
#define JNICALL
|
||||
|
||||
typedef int jint;
|
||||
|
||||
#if defined(_LP64)
|
||||
typedef long jlong;
|
||||
#else
|
||||
|
@ -97,8 +97,10 @@ void MacroAssembler::store_sized_value(Register dst, RegisterOrConstant offs, Re
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::align(int modulus) {
|
||||
while (offset() % modulus != 0) nop();
|
||||
void MacroAssembler::align(int modulus, int max, int rem) {
|
||||
int padding = (rem + modulus - (offset() % modulus)) % modulus;
|
||||
if (padding > max) return;
|
||||
for (int c = (padding >> 2); c > 0; --c) { nop(); }
|
||||
}
|
||||
|
||||
// Issue instructions that calculate given TOC from global TOC.
|
||||
@ -186,16 +188,25 @@ address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr
|
||||
|
||||
#ifdef _LP64
|
||||
// Patch compressed oops or klass constants.
|
||||
// Assembler sequence is
|
||||
// 1) compressed oops:
|
||||
// lis rx = const.hi
|
||||
// ori rx = rx | const.lo
|
||||
// 2) compressed klass:
|
||||
// lis rx = const.hi
|
||||
// clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
|
||||
// ori rx = rx | const.lo
|
||||
// Clrldi will be passed by.
|
||||
int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
|
||||
assert(UseCompressedOops, "Should only patch compressed oops");
|
||||
|
||||
const address inst2_addr = a;
|
||||
const int inst2 = *(int *)inst2_addr;
|
||||
|
||||
// The relocation points to the second instruction, the addi,
|
||||
// and the addi reads and writes the same register dst.
|
||||
const int dst = inv_rt_field(inst2);
|
||||
assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
|
||||
// The relocation points to the second instruction, the ori,
|
||||
// and the ori reads and writes the same register dst.
|
||||
const int dst = inv_rta_field(inst2);
|
||||
assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst");
|
||||
// Now, find the preceding addis which writes to dst.
|
||||
int inst1 = 0;
|
||||
address inst1_addr = inst2_addr - BytesPerInstWord;
|
||||
@ -210,8 +221,9 @@ int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop dat
|
||||
int xc = (data >> 16) & 0xffff;
|
||||
int xd = (data >> 0) & 0xffff;
|
||||
|
||||
set_imm((int *)inst1_addr,((short)(xc + ((xd & 0x8000) != 0 ? 1 : 0)))); // see enc_load_con_narrow1/2
|
||||
set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
|
||||
set_imm((int *)inst2_addr, (short)(xd));
|
||||
|
||||
return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
|
||||
}
|
||||
|
||||
@ -222,10 +234,10 @@ narrowOop MacroAssembler::get_narrow_oop(address a, address bound) {
|
||||
const address inst2_addr = a;
|
||||
const int inst2 = *(int *)inst2_addr;
|
||||
|
||||
// The relocation points to the second instruction, the addi,
|
||||
// and the addi reads and writes the same register dst.
|
||||
const int dst = inv_rt_field(inst2);
|
||||
assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
|
||||
// The relocation points to the second instruction, the ori,
|
||||
// and the ori reads and writes the same register dst.
|
||||
const int dst = inv_rta_field(inst2);
|
||||
assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst");
|
||||
// Now, find the preceding lis which writes to dst.
|
||||
int inst1 = 0;
|
||||
address inst1_addr = inst2_addr - BytesPerInstWord;
|
||||
@ -238,8 +250,9 @@ narrowOop MacroAssembler::get_narrow_oop(address a, address bound) {
|
||||
}
|
||||
assert(inst1_found, "inst is not lis");
|
||||
|
||||
uint xl = ((unsigned int) (get_imm(inst2_addr,0) & 0xffff));
|
||||
uint xh = (((((xl & 0x8000) != 0 ? -1 : 0) + get_imm(inst1_addr,0)) & 0xffff) << 16);
|
||||
uint xl = ((unsigned int) (get_imm(inst2_addr, 0) & 0xffff));
|
||||
uint xh = (((get_imm(inst1_addr, 0)) & 0xffff) << 16);
|
||||
|
||||
return (int) (xl | xh);
|
||||
}
|
||||
#endif // _LP64
|
||||
@ -252,13 +265,10 @@ void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a,
|
||||
// FIXME: We should insert relocation information for oops at the constant
|
||||
// pool entries instead of inserting it at the loads; patching of a constant
|
||||
// pool entry should be less expensive.
|
||||
Unimplemented();
|
||||
if (false) {
|
||||
address oop_address = address_constant((address)a.value(), RelocationHolder::none);
|
||||
// Relocate at the pc of the load.
|
||||
relocate(a.rspec());
|
||||
toc_offset = (int)(oop_address - code()->consts()->start());
|
||||
}
|
||||
address oop_address = address_constant((address)a.value(), RelocationHolder::none);
|
||||
// Relocate at the pc of the load.
|
||||
relocate(a.rspec());
|
||||
toc_offset = (int)(oop_address - code()->consts()->start());
|
||||
ld_largeoffset_unchecked(dst, toc_offset, toc, true);
|
||||
}
|
||||
|
||||
@ -532,7 +542,7 @@ void MacroAssembler::set_dest_of_bc_far_at(address instruction_addr, address des
|
||||
masm.b(dest);
|
||||
}
|
||||
}
|
||||
ICache::invalidate_range(instruction_addr, code_size);
|
||||
ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
|
||||
}
|
||||
|
||||
// Emit a NOT mt-safe patchable 64 bit absolute call/jump.
|
||||
@ -673,7 +683,7 @@ void MacroAssembler::set_dest_of_bxx64_patchable_at(address instruction_addr, ad
|
||||
CodeBuffer buf(instruction_addr, code_size);
|
||||
MacroAssembler masm(&buf);
|
||||
masm.bxx64_patchable(dest, relocInfo::none, link);
|
||||
ICache::invalidate_range(instruction_addr, code_size);
|
||||
ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
|
||||
}
|
||||
|
||||
// Get dest address of a bxx64_patchable instruction.
|
||||
@ -964,6 +974,14 @@ address MacroAssembler::call_c(Register fd) {
|
||||
/*load env=*/true);
|
||||
}
|
||||
|
||||
address MacroAssembler::call_c_and_return_to_caller(Register fd) {
|
||||
return branch_to(fd, /*and_link=*/false,
|
||||
/*save toc=*/false,
|
||||
/*restore toc=*/false,
|
||||
/*load toc=*/true,
|
||||
/*load env=*/true);
|
||||
}
|
||||
|
||||
address MacroAssembler::call_c(const FunctionDescriptor* fd, relocInfo::relocType rt) {
|
||||
if (rt != relocInfo::none) {
|
||||
// this call needs to be relocatable
|
||||
@ -2315,7 +2333,7 @@ void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Ja
|
||||
if (last_Java_pc != noreg)
|
||||
std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread);
|
||||
|
||||
// set last_Java_sp last
|
||||
// Set last_Java_sp last.
|
||||
std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread);
|
||||
}
|
||||
|
||||
@ -2454,6 +2472,57 @@ void MacroAssembler::reinit_heapbase(Register d, Register tmp) {
|
||||
}
|
||||
}
|
||||
|
||||
// Clear Array
|
||||
// Kills both input registers. tmp == R0 is allowed.
|
||||
void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
|
||||
// Procedure for large arrays (uses data cache block zero instruction).
|
||||
Label startloop, fast, fastloop, small_rest, restloop, done;
|
||||
const int cl_size = VM_Version::get_cache_line_size(),
|
||||
cl_dwords = cl_size>>3,
|
||||
cl_dw_addr_bits = exact_log2(cl_dwords),
|
||||
dcbz_min = 1; // Min count of dcbz executions, needs to be >0.
|
||||
|
||||
//2:
|
||||
cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included).
|
||||
blt(CCR1, small_rest); // Too small.
|
||||
rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line.
|
||||
beq(CCR0, fast); // Already 128byte aligned.
|
||||
|
||||
subfic(tmp, tmp, cl_dwords);
|
||||
mtctr(tmp); // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
|
||||
subf(cnt_dwords, tmp, cnt_dwords); // rest.
|
||||
li(tmp, 0);
|
||||
//10:
|
||||
bind(startloop); // Clear at the beginning to reach 128byte boundary.
|
||||
std(tmp, 0, base_ptr); // Clear 8byte aligned block.
|
||||
addi(base_ptr, base_ptr, 8);
|
||||
bdnz(startloop);
|
||||
//13:
|
||||
bind(fast); // Clear 128byte blocks.
|
||||
srdi(tmp, cnt_dwords, cl_dw_addr_bits); // Loop count for 128byte loop (>0).
|
||||
andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
|
||||
mtctr(tmp); // Load counter.
|
||||
//16:
|
||||
bind(fastloop);
|
||||
dcbz(base_ptr); // Clear 128byte aligned block.
|
||||
addi(base_ptr, base_ptr, cl_size);
|
||||
bdnz(fastloop);
|
||||
if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
|
||||
//20:
|
||||
bind(small_rest);
|
||||
cmpdi(CCR0, cnt_dwords, 0); // size 0?
|
||||
beq(CCR0, done); // rest == 0
|
||||
li(tmp, 0);
|
||||
mtctr(cnt_dwords); // Load counter.
|
||||
//24:
|
||||
bind(restloop); // Clear rest.
|
||||
std(tmp, 0, base_ptr); // Clear 8byte aligned block.
|
||||
addi(base_ptr, base_ptr, 8);
|
||||
bdnz(restloop);
|
||||
//27:
|
||||
bind(done);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
|
||||
|
||||
// Search for a single jchar in an jchar[].
|
||||
@ -2926,12 +2995,11 @@ void MacroAssembler::verify_oop(Register oop, const char* msg) {
|
||||
if (!VerifyOops) {
|
||||
return;
|
||||
}
|
||||
// will be preserved.
|
||||
// Will be preserved.
|
||||
Register tmp = R11;
|
||||
assert(oop != tmp, "precondition");
|
||||
unsigned int nbytes_save = 10*8; // 10 volatile gprs
|
||||
address/* FunctionDescriptor** */fd =
|
||||
StubRoutines::verify_oop_subroutine_entry_address();
|
||||
address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address();
|
||||
// save tmp
|
||||
mr(R0, tmp);
|
||||
// kill tmp
|
||||
|
@ -58,9 +58,24 @@ class MacroAssembler: public Assembler {
|
||||
|
||||
// Move register if destination register and target register are different
|
||||
inline void mr_if_needed(Register rd, Register rs);
|
||||
inline void fmr_if_needed(FloatRegister rd, FloatRegister rs);
|
||||
// This is dedicated for emitting scheduled mach nodes. For better
|
||||
// readability of the ad file I put it here.
|
||||
// Endgroups are not needed if
|
||||
// - the scheduler is off
|
||||
// - the scheduler found that there is a natural group end, in that
|
||||
// case it reduced the size of the instruction used in the test
|
||||
// yielding 'needed'.
|
||||
inline void endgroup_if_needed(bool needed);
|
||||
|
||||
// Memory barriers.
|
||||
inline void membar(int bits);
|
||||
inline void release();
|
||||
inline void acquire();
|
||||
inline void fence();
|
||||
|
||||
// nop padding
|
||||
void align(int modulus);
|
||||
void align(int modulus, int max = 252, int rem = 0);
|
||||
|
||||
//
|
||||
// Constants, loading constants, TOC support
|
||||
@ -295,6 +310,8 @@ class MacroAssembler: public Assembler {
|
||||
// Call a C function via a function descriptor and use full C
|
||||
// calling conventions. Updates and returns _last_calls_return_pc.
|
||||
address call_c(Register function_descriptor);
|
||||
// For tail calls: only branch, don't link, so callee returns to caller of this function.
|
||||
address call_c_and_return_to_caller(Register function_descriptor);
|
||||
address call_c(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt);
|
||||
address call_c_using_toc(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt,
|
||||
Register toc);
|
||||
@ -320,7 +337,7 @@ class MacroAssembler: public Assembler {
|
||||
// the entry point
|
||||
address entry_point,
|
||||
// flag which indicates if exception should be checked
|
||||
bool check_exception=true
|
||||
bool check_exception = true
|
||||
);
|
||||
|
||||
// Support for VM calls. This is the base routine called by the
|
||||
@ -530,9 +547,7 @@ class MacroAssembler: public Assembler {
|
||||
inline void null_check_throw(Register a, int offset, Register temp_reg, address exception_entry);
|
||||
|
||||
// Check accessed object for null. Use SIGTRAP-based null checks on AIX.
|
||||
inline void ld_with_trap_null_check(Register d, int si16, Register s1);
|
||||
// Variant for heap OOPs including decompression of compressed OOPs.
|
||||
inline void load_heap_oop_with_trap_null_check(Register d, RegisterOrConstant offs, Register s1);
|
||||
inline void load_with_trap_null_check(Register d, int si16, Register s1);
|
||||
|
||||
// Load heap oop and decompress. Loaded oop may not be null.
|
||||
inline void load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1 = noreg);
|
||||
@ -584,6 +599,8 @@ class MacroAssembler: public Assembler {
|
||||
is_trap_range_check_g(x) || is_trap_range_check_ge(x);
|
||||
}
|
||||
|
||||
void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0);
|
||||
|
||||
// Needle of length 1.
|
||||
void string_indexof_1(Register result, Register haystack, Register haycnt,
|
||||
Register needle, jchar needleChar,
|
||||
@ -630,7 +647,7 @@ class MacroAssembler: public Assembler {
|
||||
|
||||
// TODO: verify method and klass metadata (compare against vptr?)
|
||||
void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
|
||||
void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
|
||||
void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {}
|
||||
|
||||
#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
|
||||
#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
|
||||
|
@ -58,8 +58,25 @@ inline void MacroAssembler::round_to(Register r, int modulus) {
|
||||
|
||||
// Move register if destination register and target register are different.
|
||||
inline void MacroAssembler::mr_if_needed(Register rd, Register rs) {
|
||||
if(rs !=rd) mr(rd, rs);
|
||||
if (rs != rd) mr(rd, rs);
|
||||
}
|
||||
inline void MacroAssembler::fmr_if_needed(FloatRegister rd, FloatRegister rs) {
|
||||
if (rs != rd) fmr(rd, rs);
|
||||
}
|
||||
inline void MacroAssembler::endgroup_if_needed(bool needed) {
|
||||
if (needed) {
|
||||
endgroup();
|
||||
}
|
||||
}
|
||||
|
||||
inline void MacroAssembler::membar(int bits) {
|
||||
// TODO: use elemental_membar(bits) for Power 8 and disable optimization of acquire-release
|
||||
// (Matcher::post_membar_release where we use PPC64_ONLY(xop == Op_MemBarRelease ||))
|
||||
if (bits & StoreLoad) sync(); else lwsync();
|
||||
}
|
||||
inline void MacroAssembler::release() { membar(LoadStore | StoreStore); }
|
||||
inline void MacroAssembler::acquire() { membar(LoadLoad | LoadStore); }
|
||||
inline void MacroAssembler::fence() { membar(LoadLoad | LoadStore | StoreLoad | StoreStore); }
|
||||
|
||||
// Address of the global TOC.
|
||||
inline address MacroAssembler::global_toc() {
|
||||
@ -117,13 +134,12 @@ inline bool MacroAssembler::is_calculate_address_from_global_toc_at(address a, a
|
||||
inline bool MacroAssembler::is_set_narrow_oop(address a, address bound) {
|
||||
const address inst2_addr = a;
|
||||
const int inst2 = *(int *)a;
|
||||
// The relocation points to the second instruction, the ori.
|
||||
if (!is_ori(inst2)) return false;
|
||||
|
||||
// The relocation points to the second instruction, the addi.
|
||||
if (!is_addi(inst2)) return false;
|
||||
|
||||
// The addi reads and writes the same register dst.
|
||||
const int dst = inv_rt_field(inst2);
|
||||
if (inv_ra_field(inst2) != dst) return false;
|
||||
// The ori reads and writes the same register dst.
|
||||
const int dst = inv_rta_field(inst2);
|
||||
if (inv_rs_field(inst2) != dst) return false;
|
||||
|
||||
// Now, find the preceding addis which writes to dst.
|
||||
int inst1 = 0;
|
||||
@ -266,9 +282,10 @@ inline void MacroAssembler::trap_ic_miss_check(Register a, Register b) {
|
||||
// Do an explicit null check if access to a+offset will not raise a SIGSEGV.
|
||||
// Either issue a trap instruction that raises SIGTRAP, or do a compare that
|
||||
// branches to exception_entry.
|
||||
// No support for compressed oops (base page of heap). Does not distinguish
|
||||
// No support for compressed oops (base page of heap). Does not distinguish
|
||||
// loads and stores.
|
||||
inline void MacroAssembler::null_check_throw(Register a, int offset, Register temp_reg, address exception_entry) {
|
||||
inline void MacroAssembler::null_check_throw(Register a, int offset, Register temp_reg,
|
||||
address exception_entry) {
|
||||
if (!ImplicitNullChecks || needs_explicit_null_check(offset) || !os::zero_page_read_protected()) {
|
||||
if (TrapBasedNullChecks) {
|
||||
assert(UseSIGTRAP, "sanity");
|
||||
@ -285,7 +302,7 @@ inline void MacroAssembler::null_check_throw(Register a, int offset, Register te
|
||||
}
|
||||
}
|
||||
|
||||
inline void MacroAssembler::ld_with_trap_null_check(Register d, int si16, Register s1) {
|
||||
inline void MacroAssembler::load_with_trap_null_check(Register d, int si16, Register s1) {
|
||||
if (!os::zero_page_read_protected()) {
|
||||
if (TrapBasedNullChecks) {
|
||||
trap_null_check(s1);
|
||||
@ -294,17 +311,6 @@ inline void MacroAssembler::ld_with_trap_null_check(Register d, int si16, Regist
|
||||
ld(d, si16, s1);
|
||||
}
|
||||
|
||||
// Attention: No null check for loaded uncompressed OOP. Can be used for loading klass field.
|
||||
inline void MacroAssembler::load_heap_oop_with_trap_null_check(Register d, RegisterOrConstant si16,
|
||||
Register s1) {
|
||||
if ( !os::zero_page_read_protected()) {
|
||||
if (TrapBasedNullChecks) {
|
||||
trap_null_check(s1);
|
||||
}
|
||||
}
|
||||
load_heap_oop_not_null(d, si16, s1);
|
||||
}
|
||||
|
||||
inline void MacroAssembler::load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1) {
|
||||
if (UseCompressedOops) {
|
||||
lwz(d, offs, s1);
|
||||
|
@ -31,12 +31,16 @@
|
||||
|
||||
#define __ _masm->
|
||||
|
||||
#ifdef CC_INTERP
|
||||
#define EXCEPTION_ENTRY StubRoutines::throw_NullPointerException_at_call_entry()
|
||||
#else
|
||||
#define EXCEPTION_ENTRY Interpreter::throw_NullPointerException_entry()
|
||||
#endif
|
||||
|
||||
#ifdef PRODUCT
|
||||
#define BLOCK_COMMENT(str) // nothing
|
||||
#define STOP(error) stop(error)
|
||||
#else
|
||||
#define BLOCK_COMMENT(str) __ block_comment(str)
|
||||
#define STOP(error) block_comment(error); __ stop(error)
|
||||
#endif
|
||||
|
||||
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
|
||||
@ -167,7 +171,7 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
|
||||
sizeof(u2), /*is_signed*/ false);
|
||||
// assert(sizeof(u2) == sizeof(ConstMethod::_size_of_parameters), "");
|
||||
Label L;
|
||||
__ ld(temp2, __ argument_offset(temp2, temp2, 0), R17_tos);
|
||||
__ ld(temp2, __ argument_offset(temp2, temp2, 0), CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp));
|
||||
__ cmpd(CCR1, temp2, recv);
|
||||
__ beq(CCR1, L);
|
||||
__ stop("receiver not on stack");
|
||||
@ -194,7 +198,7 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler*
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Register argbase = R17_tos; // parameter (preserved)
|
||||
Register argbase = CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp); // parameter (preserved)
|
||||
Register argslot = R3;
|
||||
Register temp1 = R6;
|
||||
Register param_size = R7;
|
||||
@ -271,7 +275,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
|
||||
Register member_reg,
|
||||
bool for_compiler_entry) {
|
||||
assert(is_signature_polymorphic(iid), "expected invoke iid");
|
||||
Register temp1 = (for_compiler_entry ? R21_tmp1 : R7);
|
||||
Register temp1 = (for_compiler_entry ? R25_tmp5 : R7);
|
||||
Register temp2 = (for_compiler_entry ? R22_tmp2 : R8);
|
||||
Register temp3 = (for_compiler_entry ? R23_tmp3 : R9);
|
||||
Register temp4 = (for_compiler_entry ? R24_tmp4 : R10);
|
||||
@ -295,11 +299,10 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
|
||||
__ verify_oop(receiver_reg);
|
||||
if (iid == vmIntrinsics::_linkToSpecial) {
|
||||
// Don't actually load the klass; just null-check the receiver.
|
||||
__ null_check_throw(receiver_reg, 0, temp1, StubRoutines::throw_NullPointerException_at_call_entry());
|
||||
__ null_check_throw(receiver_reg, -1, temp1, EXCEPTION_ENTRY);
|
||||
} else {
|
||||
// load receiver klass itself
|
||||
__ null_check_throw(receiver_reg, oopDesc::klass_offset_in_bytes(),
|
||||
temp1, StubRoutines::throw_NullPointerException_at_call_entry());
|
||||
__ null_check_throw(receiver_reg, oopDesc::klass_offset_in_bytes(), temp1, EXCEPTION_ENTRY);
|
||||
__ load_klass(temp1_recv_klass, receiver_reg);
|
||||
__ verify_klass_ptr(temp1_recv_klass);
|
||||
}
|
||||
@ -451,7 +454,7 @@ void trace_method_handle_stub(const char* adaptername,
|
||||
if (Verbose) {
|
||||
tty->print_cr("Registers:");
|
||||
const int abi_offset = frame::abi_112_size / 8;
|
||||
for (int i = R3->encoding(); i <= R13->encoding(); i++) {
|
||||
for (int i = R3->encoding(); i <= R12->encoding(); i++) {
|
||||
Register r = as_Register(i);
|
||||
int count = i - R3->encoding();
|
||||
// The registers are stored in reverse order on the stack (by save_volatile_gprs(R1_SP, abi_112_size)).
|
||||
@ -490,7 +493,7 @@ void trace_method_handle_stub(const char* adaptername,
|
||||
trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
|
||||
}
|
||||
|
||||
// safely create a frame and call frame::describe
|
||||
// Safely create a frame and call frame::describe.
|
||||
intptr_t *dump_sp = trace_calling_frame.sender_sp();
|
||||
|
||||
frame dump_frame = frame(dump_sp);
|
||||
@ -531,7 +534,7 @@ void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adapt
|
||||
__ mr(R6_ARG4, R1_SP);
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub));
|
||||
|
||||
__ restore_volatile_gprs(R1_SP, 112); // except R0
|
||||
__ restore_volatile_gprs(R1_SP, 112); // Except R0.
|
||||
__ pop_frame();
|
||||
__ restore_LR_CR(R0);
|
||||
|
||||
|
@ -118,7 +118,7 @@ void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
|
||||
|
||||
a->bl(trampoline_stub_addr);
|
||||
}
|
||||
ICache::invalidate_range(addr_call, code_size);
|
||||
ICache::ppc64_flush_icache_bytes(addr_call, code_size);
|
||||
}
|
||||
|
||||
address NativeCall::get_trampoline() {
|
||||
@ -182,11 +182,13 @@ address NativeMovConstReg::next_instruction_address() const {
|
||||
|
||||
intptr_t NativeMovConstReg::data() const {
|
||||
address addr = addr_at(0);
|
||||
CodeBlob* cb = CodeCache::find_blob_unsafe(addr);
|
||||
|
||||
if (MacroAssembler::is_load_const_at(addr)) {
|
||||
return MacroAssembler::get_const(addr);
|
||||
} else if (MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) {
|
||||
}
|
||||
|
||||
CodeBlob* cb = CodeCache::find_blob_unsafe(addr);
|
||||
if (MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) {
|
||||
narrowOop no = (narrowOop)MacroAssembler::get_narrow_oop(addr, cb->content_begin());
|
||||
return cast_from_oop<intptr_t>(oopDesc::decode_heap_oop(no));
|
||||
} else {
|
||||
@ -213,19 +215,24 @@ address NativeMovConstReg::set_data_plain(intptr_t data, CodeBlob *cb) {
|
||||
} else if (cb != NULL &&
|
||||
MacroAssembler::is_calculate_address_from_global_toc_at(addr, cb->content_begin())) {
|
||||
// A calculation relative to the global TOC.
|
||||
const int invalidated_range =
|
||||
MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(),
|
||||
(address)data);
|
||||
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
|
||||
// FIXME:
|
||||
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
|
||||
ICache::invalidate_range(start, range);
|
||||
if (MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr, cb->content_begin()) !=
|
||||
(address)data) {
|
||||
const int invalidated_range =
|
||||
MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(),
|
||||
(address)data);
|
||||
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
|
||||
// FIXME:
|
||||
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
|
||||
ICache::ppc64_flush_icache_bytes(start, range);
|
||||
}
|
||||
next_address = addr + 1 * BytesPerInstWord;
|
||||
} else if (MacroAssembler::is_load_const_at(addr)) {
|
||||
// A normal 5 instruction load_const code sequence.
|
||||
// This is not mt safe, ok in methods like CodeBuffer::copy_code().
|
||||
MacroAssembler::patch_const(addr, (long)data);
|
||||
ICache::invalidate_range(addr, load_const_instruction_size);
|
||||
if (MacroAssembler::get_const(addr) != (long)data) {
|
||||
// This is not mt safe, ok in methods like CodeBuffer::copy_code().
|
||||
MacroAssembler::patch_const(addr, (long)data);
|
||||
ICache::ppc64_flush_icache_bytes(addr, load_const_instruction_size);
|
||||
}
|
||||
next_address = addr + 5 * BytesPerInstWord;
|
||||
} else if (MacroAssembler::is_bl(* (int*) addr)) {
|
||||
// A single branch-and-link instruction.
|
||||
@ -234,7 +241,7 @@ address NativeMovConstReg::set_data_plain(intptr_t data, CodeBlob *cb) {
|
||||
CodeBuffer cb(addr, code_size + 1);
|
||||
MacroAssembler* a = new MacroAssembler(&cb);
|
||||
a->bl((address) data);
|
||||
ICache::invalidate_range(addr, code_size);
|
||||
ICache::ppc64_flush_icache_bytes(addr, code_size);
|
||||
next_address = addr + code_size;
|
||||
} else {
|
||||
ShouldNotReachHere();
|
||||
@ -279,12 +286,13 @@ void NativeMovConstReg::set_data(intptr_t data) {
|
||||
void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL */) {
|
||||
address addr = addr_at(0);
|
||||
CodeBlob* cb = (code) ? code : CodeCache::find_blob(instruction_address());
|
||||
if (MacroAssembler::get_narrow_oop(addr, cb->content_begin()) == (long)data) return;
|
||||
const int invalidated_range =
|
||||
MacroAssembler::patch_set_narrow_oop(addr, cb->content_begin(), (long)data);
|
||||
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
|
||||
// FIXME:
|
||||
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
|
||||
ICache::invalidate_range(start, range);
|
||||
ICache::ppc64_flush_icache_bytes(start, range);
|
||||
}
|
||||
|
||||
// Do not use an assertion here. Let clients decide whether they only
|
||||
@ -292,15 +300,16 @@ void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL
|
||||
#ifdef ASSERT
|
||||
void NativeMovConstReg::verify() {
|
||||
address addr = addr_at(0);
|
||||
CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // find_nmethod() asserts if nmethod is zombie.
|
||||
if (! MacroAssembler::is_load_const_at(addr) &&
|
||||
! MacroAssembler::is_load_const_from_method_toc_at(addr) &&
|
||||
! (cb != NULL && MacroAssembler::is_calculate_address_from_global_toc_at(addr, cb->content_begin())) &&
|
||||
! (cb != NULL && MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) &&
|
||||
! MacroAssembler::is_bl(*((int*) addr))) {
|
||||
tty->print_cr("not a NativeMovConstReg at " PTR_FORMAT, addr);
|
||||
// TODO: PPC port Disassembler::decode(addr, 20, 20, tty);
|
||||
fatal(err_msg("not a NativeMovConstReg at " PTR_FORMAT, addr));
|
||||
! MacroAssembler::is_load_const_from_method_toc_at(addr)) {
|
||||
CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // find_nmethod() asserts if nmethod is zombie.
|
||||
if (! (cb != NULL && MacroAssembler::is_calculate_address_from_global_toc_at(addr, cb->content_begin())) &&
|
||||
! (cb != NULL && MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) &&
|
||||
! MacroAssembler::is_bl(*((int*) addr))) {
|
||||
tty->print_cr("not a NativeMovConstReg at " PTR_FORMAT, addr);
|
||||
// TODO: PPC port: Disassembler::decode(addr, 20, 20, tty);
|
||||
fatal(err_msg("not a NativeMovConstReg at " PTR_FORMAT, addr));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // ASSERT
|
||||
@ -326,7 +335,7 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add
|
||||
a->illtrap();
|
||||
}
|
||||
}
|
||||
ICache::invalidate_range(verified_entry, code_size);
|
||||
ICache::ppc64_flush_icache_bytes(verified_entry, code_size);
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
|
@ -132,7 +132,7 @@ inline NativeInstruction* nativeInstruction_at(address address) {
|
||||
class NativeCall: public NativeInstruction {
|
||||
public:
|
||||
|
||||
enum specific_constants {
|
||||
enum ppc_specific_constants {
|
||||
load_const_instruction_size = 28,
|
||||
load_const_from_method_toc_instruction_size = 16,
|
||||
instruction_size = 16 // Used in shared code for calls with reloc_info.
|
||||
@ -240,7 +240,7 @@ inline NativeFarCall* nativeFarCall_at(address instr) {
|
||||
class NativeMovConstReg: public NativeInstruction {
|
||||
public:
|
||||
|
||||
enum specific_constants {
|
||||
enum ppc_specific_constants {
|
||||
load_const_instruction_size = 20,
|
||||
load_const_from_method_toc_instruction_size = 8,
|
||||
instruction_size = 8 // Used in shared code for calls with reloc_info.
|
||||
@ -279,7 +279,7 @@ class NativeJump: public NativeInstruction {
|
||||
// We use MacroAssembler::b64_patchable() for implementing a
|
||||
// jump-anywhere instruction.
|
||||
|
||||
enum specific_constants {
|
||||
enum ppc_specific_constants {
|
||||
instruction_size = MacroAssembler::b64_patchable_size
|
||||
};
|
||||
|
||||
@ -384,7 +384,6 @@ class NativeCallTrampolineStub : public NativeInstruction {
|
||||
void set_destination(address new_destination);
|
||||
};
|
||||
|
||||
|
||||
inline bool is_NativeCallTrampolineStub_at(address address) {
|
||||
int first_instr = *(int*)address;
|
||||
return Assembler::is_addis(first_instr) &&
|
||||
|
12059
hotspot/src/cpu/ppc/vm/ppc.ad
Normal file
12059
hotspot/src/cpu/ppc/vm/ppc.ad
Normal file
File diff suppressed because it is too large
Load Diff
24
hotspot/src/cpu/ppc/vm/ppc_64.ad
Normal file
24
hotspot/src/cpu/ppc/vm/ppc_64.ad
Normal file
@ -0,0 +1,24 @@
|
||||
//
|
||||
// Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright 2012, 2013 SAP AG. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License version 2 only, as
|
||||
// published by the Free Software Foundation.
|
||||
//
|
||||
// This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
// version 2 for more details (a copy is included in the LICENSE file that
|
||||
// accompanied this code).
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License version
|
||||
// 2 along with this work; if not, write to the Free Software Foundation,
|
||||
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
//
|
||||
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
// or visit www.oracle.com if you need additional information or have any
|
||||
// questions.
|
||||
//
|
||||
//
|
@ -30,8 +30,8 @@
|
||||
#include "asm/macroAssembler.hpp"
|
||||
#include "asm/register.hpp"
|
||||
#include "register_ppc.hpp"
|
||||
#ifdef TARGET_ARCH_MODEL_32
|
||||
# include "interp_masm_32.hpp"
|
||||
#ifdef TARGET_ARCH_MODEL_ppc_32
|
||||
# include "interp_masm_ppc_32.hpp"
|
||||
#endif
|
||||
#ifdef TARGET_ARCH_MODEL_ppc_64
|
||||
# include "interp_masm_ppc_64.hpp"
|
||||
|
@ -44,7 +44,7 @@ const char* RegisterImpl::name() const {
|
||||
|
||||
const char* ConditionRegisterImpl::name() const {
|
||||
const char* names[number_of_registers] = {
|
||||
"CR0", "CR1", "CR2", "CR3", "CCR4", "CCR5", "CCR6", "CCR7"
|
||||
"CR0", "CR1", "CR2", "CR3", "CR4", "CR5", "CR6", "CR7"
|
||||
};
|
||||
return is_valid() ? names[encoding()] : "cnoreg";
|
||||
}
|
||||
@ -61,7 +61,7 @@ const char* FloatRegisterImpl::name() const {
|
||||
|
||||
const char* SpecialRegisterImpl::name() const {
|
||||
const char* names[number_of_registers] = {
|
||||
"SR_XER", "SR_LR", "SR_CTR", "SR_VRSAVE", "R1_SPEFSCR", "SR_PPR"
|
||||
"SR_XER", "SR_LR", "SR_CTR", "SR_VRSAVE", "SR_SPEFSCR", "SR_PPR"
|
||||
};
|
||||
return is_valid() ? names[encoding()] : "snoreg";
|
||||
}
|
||||
|
@ -60,8 +60,8 @@ typedef VMRegImpl* VMReg;
|
||||
// FPSCR Floating point status and control register (volatile)
|
||||
//
|
||||
// CR0-CR1 Condition code fields (volatile)
|
||||
// CR2-CCR4 Condition code fields (nonvolatile)
|
||||
// CCR5-CCR7 Condition code fields (volatile)
|
||||
// CR2-CR4 Condition code fields (nonvolatile)
|
||||
// CR5-CR7 Condition code fields (volatile)
|
||||
//
|
||||
// ----------------------------------------------
|
||||
// On processors with the VMX feature:
|
||||
@ -531,7 +531,7 @@ REGISTER_DECLARATION(Register, R7_ARG5, R7); // volatile
|
||||
REGISTER_DECLARATION(Register, R8_ARG6, R8); // volatile
|
||||
REGISTER_DECLARATION(Register, R9_ARG7, R9); // volatile
|
||||
REGISTER_DECLARATION(Register, R10_ARG8, R10); // volatile
|
||||
REGISTER_DECLARATION(FloatRegister, FO_SCRATCH, F0); // volatile
|
||||
REGISTER_DECLARATION(FloatRegister, F0_SCRATCH, F0); // volatile
|
||||
REGISTER_DECLARATION(FloatRegister, F1_RET, F1); // volatile
|
||||
REGISTER_DECLARATION(FloatRegister, F1_ARG1, F1); // volatile
|
||||
REGISTER_DECLARATION(FloatRegister, F2_ARG2, F2); // volatile
|
||||
@ -560,7 +560,7 @@ REGISTER_DECLARATION(FloatRegister, F13_ARG13, F13); // volatile
|
||||
#define R8_ARG6 AS_REGISTER(Register, R8)
|
||||
#define R9_ARG7 AS_REGISTER(Register, R9)
|
||||
#define R10_ARG8 AS_REGISTER(Register, R10)
|
||||
#define FO_SCRATCH AS_REGISTER(FloatRegister, F0)
|
||||
#define F0_SCRATCH AS_REGISTER(FloatRegister, F0)
|
||||
#define F1_RET AS_REGISTER(FloatRegister, F1)
|
||||
#define F1_ARG1 AS_REGISTER(FloatRegister, F1)
|
||||
#define F2_ARG2 AS_REGISTER(FloatRegister, F2)
|
||||
@ -608,7 +608,6 @@ REGISTER_DECLARATION(Register, R26_tmp6, R26);
|
||||
REGISTER_DECLARATION(Register, R27_tmp7, R27);
|
||||
REGISTER_DECLARATION(Register, R28_tmp8, R28);
|
||||
REGISTER_DECLARATION(Register, R29_tmp9, R29);
|
||||
REGISTER_DECLARATION(Register, R30_polling_page, R30);
|
||||
#ifndef DONT_USE_REGISTER_DEFINES
|
||||
#define R21_tmp1 AS_REGISTER(Register, R21)
|
||||
#define R22_tmp2 AS_REGISTER(Register, R22)
|
||||
@ -619,7 +618,6 @@ REGISTER_DECLARATION(Register, R30_polling_page, R30);
|
||||
#define R27_tmp7 AS_REGISTER(Register, R27)
|
||||
#define R28_tmp8 AS_REGISTER(Register, R28)
|
||||
#define R29_tmp9 AS_REGISTER(Register, R29)
|
||||
#define R30_polling_page AS_REGISTER(Register, R30)
|
||||
|
||||
#define CCR4_is_synced AS_REGISTER(ConditionRegister, CCR4)
|
||||
#endif
|
||||
|
183
hotspot/src/cpu/ppc/vm/runtime_ppc.cpp
Normal file
183
hotspot/src/cpu/ppc/vm/runtime_ppc.cpp
Normal file
@ -0,0 +1,183 @@
|
||||
/*
|
||||
* Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2013 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#ifdef COMPILER2
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "asm/macroAssembler.inline.hpp"
|
||||
#include "classfile/systemDictionary.hpp"
|
||||
#include "code/vmreg.hpp"
|
||||
#include "interpreter/interpreter.hpp"
|
||||
#include "nativeInst_ppc.hpp"
|
||||
#include "opto/runtime.hpp"
|
||||
#include "runtime/interfaceSupport.hpp"
|
||||
#include "runtime/sharedRuntime.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "runtime/vframeArray.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "vmreg_ppc.inline.hpp"
|
||||
#endif
|
||||
|
||||
#define __ masm->
|
||||
|
||||
|
||||
#ifdef COMPILER2
|
||||
|
||||
// SP adjustment (must use unextended SP) for method handle call sites
|
||||
// during exception handling.
|
||||
static intptr_t adjust_SP_for_methodhandle_callsite(JavaThread *thread) {
|
||||
RegisterMap map(thread, false);
|
||||
// The frame constructor will do the correction for us (see frame::adjust_unextended_SP).
|
||||
frame mh_caller_frame = thread->last_frame().sender(&map);
|
||||
assert(mh_caller_frame.is_compiled_frame(), "Only may reach here for compiled MH call sites");
|
||||
return (intptr_t) mh_caller_frame.unextended_sp();
|
||||
}
|
||||
|
||||
//------------------------------generate_exception_blob---------------------------
|
||||
// Creates exception blob at the end.
|
||||
// Using exception blob, this code is jumped from a compiled method.
|
||||
//
|
||||
// Given an exception pc at a call we call into the runtime for the
|
||||
// handler in this method. This handler might merely restore state
|
||||
// (i.e. callee save registers) unwind the frame and jump to the
|
||||
// exception handler for the nmethod if there is no Java level handler
|
||||
// for the nmethod.
|
||||
//
|
||||
// This code is entered with a jmp.
|
||||
//
|
||||
// Arguments:
|
||||
// R3_ARG1: exception oop
|
||||
// R4_ARG2: exception pc
|
||||
//
|
||||
// Results:
|
||||
// R3_ARG1: exception oop
|
||||
// R4_ARG2: exception pc in caller
|
||||
// destination: exception handler of caller
|
||||
//
|
||||
// Note: the exception pc MUST be at a call (precise debug information)
|
||||
//
|
||||
void OptoRuntime::generate_exception_blob() {
|
||||
// Allocate space for the code.
|
||||
ResourceMark rm;
|
||||
// Setup code generation tools.
|
||||
CodeBuffer buffer("exception_blob", 2048, 1024);
|
||||
InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
|
||||
|
||||
address start = __ pc();
|
||||
|
||||
int frame_size_in_bytes = frame::abi_112_size;
|
||||
OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
|
||||
|
||||
// Exception pc is 'return address' for stack walker.
|
||||
__ std(R4_ARG2/*exception pc*/, _abi(lr), R1_SP);
|
||||
|
||||
// Store the exception in the Thread object.
|
||||
__ std(R3_ARG1/*exception oop*/, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
|
||||
__ std(R4_ARG2/*exception pc*/, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
|
||||
|
||||
// Save callee-saved registers.
|
||||
// Push a C frame for the exception blob. It is needed for the C call later on.
|
||||
__ push_frame_abi112(0, R11_scratch1);
|
||||
|
||||
// This call does all the hard work. It checks if an exception handler
|
||||
// exists in the method.
|
||||
// If so, it returns the handler address.
|
||||
// If not, it prepares for stack-unwinding, restoring the callee-save
|
||||
// registers of the frame being removed.
|
||||
__ set_last_Java_frame(/*sp=*/R1_SP, noreg);
|
||||
|
||||
__ mr(R3_ARG1, R16_thread);
|
||||
__ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, OptoRuntime::handle_exception_C),
|
||||
relocInfo::none);
|
||||
address calls_return_pc = __ last_calls_return_pc();
|
||||
# ifdef ASSERT
|
||||
__ cmpdi(CCR0, R3_RET, 0);
|
||||
__ asm_assert_ne("handle_exception_C must not return NULL", 0x601);
|
||||
# endif
|
||||
|
||||
// Set an oopmap for the call site. This oopmap will only be used if we
|
||||
// are unwinding the stack. Hence, all locations will be dead.
|
||||
// Callee-saved registers will be the same as the frame above (i.e.,
|
||||
// handle_exception_stub), since they were restored when we got the
|
||||
// exception.
|
||||
OopMapSet* oop_maps = new OopMapSet();
|
||||
oop_maps->add_gc_map(calls_return_pc - start, map);
|
||||
|
||||
// Get unextended_sp for method handle call sites.
|
||||
Label mh_callsite, mh_done; // Use a 2nd c call if it's a method handle call site.
|
||||
__ lwa(R4_ARG2, in_bytes(JavaThread::is_method_handle_return_offset()), R16_thread);
|
||||
__ cmpwi(CCR0, R4_ARG2, 0);
|
||||
__ bne(CCR0, mh_callsite);
|
||||
|
||||
__ mtctr(R3_RET); // Move address of exception handler to SR_CTR.
|
||||
__ reset_last_Java_frame();
|
||||
__ pop_frame();
|
||||
|
||||
__ bind(mh_done);
|
||||
// We have a handler in register SR_CTR (could be deopt blob).
|
||||
|
||||
// Get the exception oop.
|
||||
__ ld(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
|
||||
|
||||
// Get the exception pc in case we are deoptimized.
|
||||
__ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
|
||||
|
||||
// Reset thread values.
|
||||
__ li(R0, 0);
|
||||
#ifdef ASSERT
|
||||
__ std(R0, in_bytes(JavaThread::exception_handler_pc_offset()), R16_thread);
|
||||
__ std(R0, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
|
||||
#endif
|
||||
// Clear the exception oop so GC no longer processes it as a root.
|
||||
__ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
|
||||
|
||||
// Move exception pc into SR_LR.
|
||||
__ mtlr(R4_ARG2);
|
||||
__ bctr();
|
||||
|
||||
|
||||
// Same as above, but also set sp to unextended_sp.
|
||||
__ bind(mh_callsite);
|
||||
__ mr(R31, R3_RET); // Save branch address.
|
||||
__ mr(R3_ARG1, R16_thread);
|
||||
__ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, adjust_SP_for_methodhandle_callsite), relocInfo::none);
|
||||
// Returns unextended_sp in R3_RET.
|
||||
|
||||
__ mtctr(R31); // Move address of exception handler to SR_CTR.
|
||||
__ reset_last_Java_frame();
|
||||
|
||||
__ mr(R1_SP, R3_RET); // Set sp to unextended_sp.
|
||||
__ b(mh_done);
|
||||
|
||||
|
||||
// Make sure all code is generated.
|
||||
masm->flush();
|
||||
|
||||
// Set exception blob.
|
||||
_exception_blob = ExceptionBlob::create(&buffer, oop_maps,
|
||||
frame_size_in_bytes/wordSize);
|
||||
}
|
||||
|
||||
#endif // COMPILER2
|
@ -687,17 +687,9 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
|
||||
F13->as_VMReg()
|
||||
};
|
||||
|
||||
const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
|
||||
const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
|
||||
|
||||
// The first 8 arguments are not passed on the stack.
|
||||
const int num_args_in_regs = 8;
|
||||
#define put_arg_in_reg(arg) ((arg) < num_args_in_regs)
|
||||
|
||||
// Check calling conventions consistency.
|
||||
assert(num_iarg_registers == num_args_in_regs
|
||||
&& num_iarg_registers == 8
|
||||
&& num_farg_registers == 13,
|
||||
assert(sizeof(iarg_reg) / sizeof(iarg_reg[0]) == Argument::n_int_register_parameters_c &&
|
||||
sizeof(farg_reg) / sizeof(farg_reg[0]) == Argument::n_float_register_parameters_c,
|
||||
"consistency");
|
||||
|
||||
// `Stk' counts stack slots. Due to alignment, 32 bit values occupy
|
||||
@ -705,8 +697,6 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
|
||||
const int inc_stk_for_intfloat = 2; // 2 slots for ints and floats
|
||||
const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
|
||||
|
||||
int ill_i = 0;
|
||||
int ill_t = 0;
|
||||
int i;
|
||||
VMReg reg;
|
||||
// Leave room for C-compatible ABI_112.
|
||||
@ -726,6 +716,11 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
|
||||
if (regs2 != NULL) regs2[i].set_bad();
|
||||
|
||||
switch(sig_bt[i]) {
|
||||
|
||||
//
|
||||
// If arguments 0-7 are integers, they are passed in integer registers.
|
||||
// Argument i is placed in iarg_reg[i].
|
||||
//
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR:
|
||||
case T_BYTE:
|
||||
@ -754,7 +749,7 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
|
||||
case T_ADDRESS:
|
||||
case T_METADATA:
|
||||
// Oops are already boxed if required (JNI).
|
||||
if (put_arg_in_reg(arg)) {
|
||||
if (arg < Argument::n_int_register_parameters_c) {
|
||||
reg = iarg_reg[arg];
|
||||
} else {
|
||||
reg = VMRegImpl::stack2reg(stk);
|
||||
@ -762,57 +757,66 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
|
||||
}
|
||||
regs[i].set2(reg);
|
||||
break;
|
||||
|
||||
//
|
||||
// Floats are treated differently from int regs: The first 13 float arguments
|
||||
// are passed in registers (not the float args among the first 13 args).
|
||||
// Thus argument i is NOT passed in farg_reg[i] if it is float. It is passed
|
||||
// in farg_reg[j] if argument i is the j-th float argument of this call.
|
||||
//
|
||||
case T_FLOAT:
|
||||
if (put_arg_in_reg(arg)) {
|
||||
if (freg < Argument::n_float_register_parameters_c) {
|
||||
// Put float in register ...
|
||||
reg = farg_reg[freg];
|
||||
++freg;
|
||||
|
||||
// Argument i for i > 8 is placed on the stack even if it's
|
||||
// placed in a register (if it's a float arg). Aix disassembly
|
||||
// shows that xlC places these float args on the stack AND in
|
||||
// a register. This is not documented, but we follow this
|
||||
// convention, too.
|
||||
if (arg >= Argument::n_regs_not_on_stack_c) {
|
||||
// ... and on the stack.
|
||||
guarantee(regs2 != NULL, "must pass float in register and stack slot");
|
||||
VMReg reg2 = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
|
||||
regs2[i].set1(reg2);
|
||||
stk += inc_stk_for_intfloat;
|
||||
}
|
||||
|
||||
} else {
|
||||
// Put float on stack
|
||||
# if defined(LINUX)
|
||||
reg = VMRegImpl::stack2reg(stk+1);
|
||||
# elif defined(AIX)
|
||||
reg = VMRegImpl::stack2reg(stk);
|
||||
# else
|
||||
# error "unknown OS"
|
||||
# endif
|
||||
// Put float on stack.
|
||||
reg = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
|
||||
stk += inc_stk_for_intfloat;
|
||||
}
|
||||
|
||||
if (freg < num_farg_registers) {
|
||||
// There are still some float argument registers left. Put the
|
||||
// float in a register if not already done.
|
||||
if (reg != farg_reg[freg]) {
|
||||
guarantee(regs2 != NULL, "must pass float in register and stack slot");
|
||||
VMReg reg2 = farg_reg[freg];
|
||||
regs2[i].set1(reg2);
|
||||
}
|
||||
++freg;
|
||||
}
|
||||
|
||||
regs[i].set1(reg);
|
||||
break;
|
||||
case T_DOUBLE:
|
||||
assert(sig_bt[i+1] == T_VOID, "expecting half");
|
||||
if (put_arg_in_reg(arg)) {
|
||||
if (freg < Argument::n_float_register_parameters_c) {
|
||||
// Put double in register ...
|
||||
reg = farg_reg[freg];
|
||||
++freg;
|
||||
|
||||
// Argument i for i > 8 is placed on the stack even if it's
|
||||
// placed in a register (if it's a double arg). Aix disassembly
|
||||
// shows that xlC places these float args on the stack AND in
|
||||
// a register. This is not documented, but we follow this
|
||||
// convention, too.
|
||||
if (arg >= Argument::n_regs_not_on_stack_c) {
|
||||
// ... and on the stack.
|
||||
guarantee(regs2 != NULL, "must pass float in register and stack slot");
|
||||
VMReg reg2 = VMRegImpl::stack2reg(stk);
|
||||
regs2[i].set2(reg2);
|
||||
stk += inc_stk_for_longdouble;
|
||||
}
|
||||
} else {
|
||||
// Put double on stack.
|
||||
reg = VMRegImpl::stack2reg(stk);
|
||||
stk += inc_stk_for_longdouble;
|
||||
}
|
||||
|
||||
if (freg < num_farg_registers) {
|
||||
// There are still some float argument registers left. Put the
|
||||
// float in a register if not already done.
|
||||
if (reg != farg_reg[freg]) {
|
||||
guarantee(regs2 != NULL, "must pass float in register and stack slot");
|
||||
VMReg reg2 = farg_reg[freg];
|
||||
regs2[i].set2(reg2);
|
||||
}
|
||||
++freg;
|
||||
}
|
||||
|
||||
regs[i].set2(reg);
|
||||
break;
|
||||
|
||||
case T_VOID:
|
||||
// Do not count halves.
|
||||
regs[i].set_bad();
|
||||
@ -877,7 +881,7 @@ static address gen_c2i_adapter(MacroAssembler *masm,
|
||||
__ mtlr(return_pc);
|
||||
|
||||
|
||||
// call the interpreter
|
||||
// Call the interpreter.
|
||||
__ BIND(call_interpreter);
|
||||
__ mtctr(ientry);
|
||||
|
||||
@ -947,8 +951,12 @@ static address gen_c2i_adapter(MacroAssembler *masm,
|
||||
|
||||
// Jump to the interpreter just as if interpreter was doing it.
|
||||
|
||||
#ifdef CC_INTERP
|
||||
const Register tos = R17_tos;
|
||||
#endif
|
||||
|
||||
// load TOS
|
||||
__ addi(R17_tos, R1_SP, st_off);
|
||||
__ addi(tos, R1_SP, st_off);
|
||||
|
||||
// Frame_manager expects initial_caller_sp (= SP without resize by c2i) in R21_tmp1.
|
||||
assert(sender_SP == R21_sender_SP, "passing initial caller's SP in wrong register");
|
||||
@ -982,7 +990,9 @@ static void gen_i2c_adapter(MacroAssembler *masm,
|
||||
// save code can segv when fxsave instructions find improperly
|
||||
// aligned stack pointer.
|
||||
|
||||
#ifdef CC_INTERP
|
||||
const Register ld_ptr = R17_tos;
|
||||
#endif
|
||||
const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 };
|
||||
const int num_value_regs = sizeof(value_regs) / sizeof(Register);
|
||||
int value_regs_index = 0;
|
||||
@ -1137,7 +1147,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
|
||||
__ bne_predict_taken(CCR0, valid);
|
||||
// We have a null argument, branch to ic_miss_stub.
|
||||
__ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
|
||||
relocInfo::runtime_call_type);
|
||||
relocInfo::runtime_call_type);
|
||||
__ BIND(valid);
|
||||
}
|
||||
}
|
||||
@ -1154,7 +1164,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
|
||||
__ beq_predict_taken(CCR0, valid);
|
||||
// We have an unexpected klass, branch to ic_miss_stub.
|
||||
__ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
|
||||
relocInfo::runtime_call_type);
|
||||
relocInfo::runtime_call_type);
|
||||
__ BIND(valid);
|
||||
}
|
||||
|
||||
@ -1170,8 +1180,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
|
||||
__ beq_predict_taken(CCR0, call_interpreter);
|
||||
|
||||
// Branch to ic_miss_stub.
|
||||
__ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
|
||||
relocInfo::runtime_call_type);
|
||||
__ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
|
||||
|
||||
// entry: c2i
|
||||
|
||||
@ -2594,7 +2603,11 @@ static void push_skeleton_frame(MacroAssembler* masm, bool deopt,
|
||||
__ ld(frame_size_reg, 0, frame_sizes_reg);
|
||||
__ std(pc_reg, _abi(lr), R1_SP);
|
||||
__ push_frame(frame_size_reg, R0/*tmp*/);
|
||||
#ifdef CC_INTERP
|
||||
__ std(R1_SP, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
|
||||
#else
|
||||
Unimplemented();
|
||||
#endif
|
||||
__ addi(number_of_frames_reg, number_of_frames_reg, -1);
|
||||
__ addi(frame_sizes_reg, frame_sizes_reg, wordSize);
|
||||
__ addi(pcs_reg, pcs_reg, wordSize);
|
||||
@ -2693,7 +2706,9 @@ static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
|
||||
// Store it in the top interpreter frame.
|
||||
__ std(R0, _abi(lr), R1_SP);
|
||||
// Initialize frame_manager_lr of interpreter top frame.
|
||||
#ifdef CC_INTERP
|
||||
__ std(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2886,8 +2901,7 @@ void SharedRuntime::generate_deopt_blob() {
|
||||
|
||||
// Initialize R14_state.
|
||||
__ ld(R14_state, 0, R1_SP);
|
||||
__ addi(R14_state, R14_state,
|
||||
-frame::interpreter_frame_cinterpreterstate_size_in_bytes());
|
||||
__ addi(R14_state, R14_state, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
|
||||
// Also inititialize R15_prev_state.
|
||||
__ restore_prev_state();
|
||||
|
||||
@ -3010,8 +3024,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
|
||||
|
||||
// Initialize R14_state, ...
|
||||
__ ld(R11_scratch1, 0, R1_SP);
|
||||
__ addi(R14_state, R11_scratch1,
|
||||
-frame::interpreter_frame_cinterpreterstate_size_in_bytes());
|
||||
__ addi(R14_state, R11_scratch1, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
|
||||
// also initialize R15_prev_state.
|
||||
__ restore_prev_state();
|
||||
// Return to the interpreter entry point.
|
||||
|
@ -146,14 +146,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// FIXME: use round_to() here
|
||||
__ andi_(r_frame_alignment_in_bytes, r_arg_argument_count, 1);
|
||||
__ sldi(r_frame_alignment_in_bytes,
|
||||
r_frame_alignment_in_bytes, Interpreter::logStackElementSize);
|
||||
r_frame_alignment_in_bytes, Interpreter::logStackElementSize);
|
||||
|
||||
// size = unaligned size of arguments + top abi's size
|
||||
__ addi(r_frame_size, r_argument_size_in_bytes,
|
||||
frame::top_ijava_frame_abi_size);
|
||||
// size += arguments alignment
|
||||
__ add(r_frame_size,
|
||||
r_frame_size, r_frame_alignment_in_bytes);
|
||||
r_frame_size, r_frame_alignment_in_bytes);
|
||||
// size += size of call_stub locals
|
||||
__ addi(r_frame_size,
|
||||
r_frame_size, frame::entry_frame_locals_size);
|
||||
@ -179,7 +179,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ addi(r_top_of_arguments_addr,
|
||||
R1_SP, frame::top_ijava_frame_abi_size);
|
||||
__ add(r_top_of_arguments_addr,
|
||||
r_top_of_arguments_addr, r_frame_alignment_in_bytes);
|
||||
r_top_of_arguments_addr, r_frame_alignment_in_bytes);
|
||||
|
||||
// any arguments to copy?
|
||||
__ cmpdi(CCR0, r_arg_argument_count, 0);
|
||||
@ -229,22 +229,23 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
// Register state on entry to frame manager / native entry:
|
||||
//
|
||||
// R17_tos - intptr_t* sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8
|
||||
// tos - intptr_t* sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8
|
||||
// R19_method - Method
|
||||
// R16_thread - JavaThread*
|
||||
|
||||
// R17_tos must point to last argument - element_size.
|
||||
__ addi(R17_tos, r_top_of_arguments_addr, -Interpreter::stackElementSize);
|
||||
// Tos must point to last argument - element_size.
|
||||
const Register tos = R17_tos;
|
||||
__ addi(tos, r_top_of_arguments_addr, -Interpreter::stackElementSize);
|
||||
|
||||
// initialize call_stub locals (step 2)
|
||||
// now save R17_tos as arguments_tos_address
|
||||
__ std(R17_tos, _entry_frame_locals_neg(arguments_tos_address), r_entryframe_fp);
|
||||
// now save tos as arguments_tos_address
|
||||
__ std(tos, _entry_frame_locals_neg(arguments_tos_address), r_entryframe_fp);
|
||||
|
||||
// load argument registers for call
|
||||
__ mr(R19_method, r_arg_method);
|
||||
__ mr(R16_thread, r_arg_thread);
|
||||
assert(R17_tos != r_arg_method, "trashed r_arg_method");
|
||||
assert(R17_tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread");
|
||||
assert(tos != r_arg_method, "trashed r_arg_method");
|
||||
assert(tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread");
|
||||
|
||||
// Set R15_prev_state to 0 for simplifying checks in callee.
|
||||
__ li(R15_prev_state, 0);
|
||||
@ -274,7 +275,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// Do a light-weight C-call here, r_new_arg_entry holds the address
|
||||
// of the interpreter entry point (frame manager or native entry)
|
||||
// and save runtime-value of LR in return_address.
|
||||
assert(r_new_arg_entry != R17_tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread,
|
||||
assert(r_new_arg_entry != tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread,
|
||||
"trashed r_new_arg_entry");
|
||||
return_address = __ call_stub(r_new_arg_entry);
|
||||
}
|
||||
@ -326,8 +327,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE is treated as T_INT.
|
||||
__ cmpwi(CCR0, r_arg_result_type, T_OBJECT);
|
||||
__ cmpwi(CCR1, r_arg_result_type, T_LONG);
|
||||
__ cmpwi(CCR5, r_arg_result_type, T_FLOAT);
|
||||
__ cmpwi(CCR6, r_arg_result_type, T_DOUBLE);
|
||||
__ cmpwi(CCR5, r_arg_result_type, T_FLOAT);
|
||||
__ cmpwi(CCR6, r_arg_result_type, T_DOUBLE);
|
||||
|
||||
// restore non-volatile registers
|
||||
__ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14));
|
||||
@ -345,8 +346,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ beq(CCR0, ret_is_object);
|
||||
__ beq(CCR1, ret_is_long);
|
||||
__ beq(CCR5, ret_is_float);
|
||||
__ beq(CCR6, ret_is_double);
|
||||
__ beq(CCR5, ret_is_float);
|
||||
__ beq(CCR6, ret_is_double);
|
||||
|
||||
// default:
|
||||
__ stw(R3_RET, 0, r_arg_result_addr);
|
||||
@ -614,6 +615,17 @@ class StubGenerator: public StubCodeGenerator {
|
||||
if (!dest_uninitialized) {
|
||||
const int spill_slots = 4 * wordSize;
|
||||
const int frame_size = frame::abi_112_size + spill_slots;
|
||||
Label filtered;
|
||||
|
||||
// Is marking active?
|
||||
if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
|
||||
__ lwz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread);
|
||||
} else {
|
||||
guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
|
||||
__ lbz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread);
|
||||
}
|
||||
__ cmpdi(CCR0, Rtmp1, 0);
|
||||
__ beq(CCR0, filtered);
|
||||
|
||||
__ save_LR_CR(R0);
|
||||
__ push_frame_abi112(spill_slots, R0);
|
||||
@ -628,6 +640,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ ld(count, frame_size - 3 * wordSize, R1_SP);
|
||||
__ pop_frame();
|
||||
__ restore_LR_CR(R0);
|
||||
|
||||
__ bind(filtered);
|
||||
}
|
||||
break;
|
||||
case BarrierSet::CardTableModRef:
|
||||
@ -648,21 +662,28 @@ class StubGenerator: public StubCodeGenerator {
|
||||
//
|
||||
// The input registers and R0 are overwritten.
|
||||
//
|
||||
void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp) {
|
||||
void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp, bool branchToEnd) {
|
||||
BarrierSet* const bs = Universe::heap()->barrier_set();
|
||||
|
||||
switch (bs->kind()) {
|
||||
case BarrierSet::G1SATBCT:
|
||||
case BarrierSet::G1SATBCTLogging:
|
||||
{
|
||||
__ save_LR_CR(R0);
|
||||
// We need this frame only that the callee can spill LR/CR.
|
||||
__ push_frame_abi112(0, R0);
|
||||
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
|
||||
|
||||
__ pop_frame();
|
||||
__ restore_LR_CR(R0);
|
||||
if (branchToEnd) {
|
||||
__ save_LR_CR(R0);
|
||||
// We need this frame only to spill LR.
|
||||
__ push_frame_abi112(0, R0);
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
|
||||
__ pop_frame();
|
||||
__ restore_LR_CR(R0);
|
||||
} else {
|
||||
// Tail call: fake call from stub caller by branching without linking.
|
||||
address entry_point = (address)CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post);
|
||||
__ mr_if_needed(R3_ARG1, addr);
|
||||
__ mr_if_needed(R4_ARG2, count);
|
||||
__ load_const(R11, entry_point, R0);
|
||||
__ call_c_and_return_to_caller(R11);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case BarrierSet::CardTableModRef:
|
||||
@ -697,9 +718,12 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ addi(addr, addr, 1);
|
||||
__ bdnz(Lstore_loop);
|
||||
__ bind(Lskip_loop);
|
||||
|
||||
if (!branchToEnd) __ blr();
|
||||
}
|
||||
break;
|
||||
case BarrierSet::ModRef:
|
||||
if (!branchToEnd) __ blr();
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
@ -847,30 +871,28 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// The code is implemented(ported from sparc) as we believe it benefits JVM98, however
|
||||
// tracing(-XX:+TraceOptimizeFill) shows the intrinsic replacement doesn't happen at all!
|
||||
//
|
||||
// Source code in function is_range_check_if() shows OptimizeFill relaxed the condition
|
||||
// Source code in function is_range_check_if() shows that OptimizeFill relaxed the condition
|
||||
// for turning on loop predication optimization, and hence the behavior of "array range check"
|
||||
// and "loop invariant check" could be influenced, which potentially boosted JVM98.
|
||||
//
|
||||
// We leave the code here and see if Oracle has updates in later releases(later than HS20).
|
||||
//
|
||||
// Generate stub for disjoint short fill. If "aligned" is true, the
|
||||
// "to" address is assumed to be heapword aligned.
|
||||
// Generate stub for disjoint short fill. If "aligned" is true, the
|
||||
// "to" address is assumed to be heapword aligned.
|
||||
//
|
||||
// Arguments for generated stub:
|
||||
// to: R3_ARG1
|
||||
// value: R4_ARG2
|
||||
// count: R5_ARG3 treated as signed
|
||||
// to: R3_ARG1
|
||||
// value: R4_ARG2
|
||||
// count: R5_ARG3 treated as signed
|
||||
//
|
||||
address generate_fill(BasicType t, bool aligned, const char* name) {
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ emit_fd();
|
||||
|
||||
const Register to = R3_ARG1; // source array address
|
||||
const Register value = R4_ARG2; // fill value
|
||||
const Register count = R5_ARG3; // elements count
|
||||
const Register temp = R6_ARG4; // temp register
|
||||
const Register to = R3_ARG1; // source array address
|
||||
const Register value = R4_ARG2; // fill value
|
||||
const Register count = R5_ARG3; // elements count
|
||||
const Register temp = R6_ARG4; // temp register
|
||||
|
||||
//assert_clean_int(count, O3); // Make sure 'count' is clean int.
|
||||
//assert_clean_int(count, O3); // Make sure 'count' is clean int.
|
||||
|
||||
Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
|
||||
Label L_fill_2_bytes, L_fill_4_bytes, L_fill_elements, L_fill_32_bytes;
|
||||
@ -879,31 +901,31 @@ class StubGenerator: public StubCodeGenerator {
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
shift = 2;
|
||||
// clone bytes (zero extend not needed because store instructions below ignore high order bytes)
|
||||
// Clone bytes (zero extend not needed because store instructions below ignore high order bytes).
|
||||
__ rldimi(value, value, 8, 48); // 8 bit -> 16 bit
|
||||
__ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element.
|
||||
__ blt(CCR0, L_fill_elements);
|
||||
__ rldimi(value, value, 16, 32); // 16 bit -> 32 bit
|
||||
break;
|
||||
case T_SHORT:
|
||||
shift = 1;
|
||||
// clone bytes (zero extend not needed because store instructions below ignore high order bytes)
|
||||
// Clone bytes (zero extend not needed because store instructions below ignore high order bytes).
|
||||
__ rldimi(value, value, 16, 32); // 16 bit -> 32 bit
|
||||
__ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element.
|
||||
__ blt(CCR0, L_fill_elements);
|
||||
break;
|
||||
case T_INT:
|
||||
shift = 0;
|
||||
__ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element.
|
||||
__ blt(CCR0, L_fill_4_bytes);
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
||||
if (!aligned && (t == T_BYTE || t == T_SHORT)) {
|
||||
// align source address at 4 bytes address boundary
|
||||
// Align source address at 4 bytes address boundary.
|
||||
if (t == T_BYTE) {
|
||||
// One byte misalignment happens only for byte arrays
|
||||
// One byte misalignment happens only for byte arrays.
|
||||
__ andi_(temp, to, 1);
|
||||
__ beq(CCR0, L_skip_align1);
|
||||
__ stb(value, 0, to);
|
||||
@ -930,12 +952,12 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ bind(L_fill_32_bytes);
|
||||
}
|
||||
|
||||
__ li(temp, 8<<shift); // prepare for 32 byte loop
|
||||
// clone bytes int->long as above
|
||||
__ rldimi(value, value, 32, 0); // 32 bit -> 64 bit
|
||||
__ li(temp, 8<<shift); // Prepare for 32 byte loop.
|
||||
// Clone bytes int->long as above.
|
||||
__ rldimi(value, value, 32, 0); // 32 bit -> 64 bit
|
||||
|
||||
Label L_check_fill_8_bytes;
|
||||
// Fill 32-byte chunks
|
||||
// Fill 32-byte chunks.
|
||||
__ subf_(count, temp, count);
|
||||
__ blt(CCR0, L_check_fill_8_bytes);
|
||||
|
||||
@ -945,7 +967,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ std(value, 0, to);
|
||||
__ std(value, 8, to);
|
||||
__ subf_(count, temp, count); // update count
|
||||
__ subf_(count, temp, count); // Update count.
|
||||
__ std(value, 16, to);
|
||||
__ std(value, 24, to);
|
||||
|
||||
@ -968,7 +990,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ addi(to, to, 8);
|
||||
__ bge(CCR0, L_fill_8_bytes_loop);
|
||||
|
||||
// fill trailing 4 bytes
|
||||
// Fill trailing 4 bytes.
|
||||
__ bind(L_fill_4_bytes);
|
||||
__ andi_(temp, count, 1<<shift);
|
||||
__ beq(CCR0, L_fill_2_bytes);
|
||||
@ -976,14 +998,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ stw(value, 0, to);
|
||||
if (t == T_BYTE || t == T_SHORT) {
|
||||
__ addi(to, to, 4);
|
||||
// fill trailing 2 bytes
|
||||
// Fill trailing 2 bytes.
|
||||
__ bind(L_fill_2_bytes);
|
||||
__ andi_(temp, count, 1<<(shift-1));
|
||||
__ beq(CCR0, L_fill_byte);
|
||||
__ sth(value, 0, to);
|
||||
if (t == T_BYTE) {
|
||||
__ addi(to, to, 2);
|
||||
// fill trailing byte
|
||||
// Fill trailing byte.
|
||||
__ bind(L_fill_byte);
|
||||
__ andi_(count, count, 1);
|
||||
__ beq(CCR0, L_exit);
|
||||
@ -997,7 +1019,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ bind(L_exit);
|
||||
__ blr();
|
||||
|
||||
// Handle copies less than 8 bytes. Int is handled elsewhere.
|
||||
// Handle copies less than 8 bytes. Int is handled elsewhere.
|
||||
if (t == T_BYTE) {
|
||||
__ bind(L_fill_elements);
|
||||
Label L_fill_2, L_fill_4;
|
||||
@ -1039,7 +1061,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
|
||||
|
||||
// Generate overlap test for array copy stubs
|
||||
// Generate overlap test for array copy stubs.
|
||||
//
|
||||
// Input:
|
||||
// R3_ARG1 - from
|
||||
@ -1873,10 +1895,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
generate_conjoint_long_copy_core(aligned);
|
||||
}
|
||||
|
||||
gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
|
||||
|
||||
__ blr();
|
||||
|
||||
gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
|
||||
return start;
|
||||
}
|
||||
|
||||
@ -1906,9 +1925,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
generate_disjoint_long_copy_core(aligned);
|
||||
}
|
||||
|
||||
gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
|
||||
|
||||
__ blr();
|
||||
gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
@ -89,16 +89,17 @@ void VM_Version::initialize() {
|
||||
}
|
||||
|
||||
// On Power6 test for section size.
|
||||
if (PowerArchitecturePPC64 == 6)
|
||||
if (PowerArchitecturePPC64 == 6) {
|
||||
determine_section_size();
|
||||
// TODO: PPC port else
|
||||
// TODO: PPC port } else {
|
||||
// TODO: PPC port PdScheduling::power6SectorSize = 0x20;
|
||||
}
|
||||
|
||||
MaxVectorSize = 8;
|
||||
#endif
|
||||
|
||||
// Create and print feature-string.
|
||||
char buf[(num_features+1) * 16]; // max 16 chars per feature
|
||||
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
|
||||
jio_snprintf(buf, sizeof(buf),
|
||||
"ppc64%s%s%s%s%s%s%s%s",
|
||||
(has_fsqrt() ? " fsqrt" : ""),
|
||||
@ -127,21 +128,21 @@ void VM_Version::initialize() {
|
||||
if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1;
|
||||
|
||||
if (AllocatePrefetchStyle == 4) {
|
||||
AllocatePrefetchStepSize = cache_line_size; // need exact value
|
||||
if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // use larger blocks by default
|
||||
if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // default is not defined ?
|
||||
AllocatePrefetchStepSize = cache_line_size; // Need exact value.
|
||||
if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // Use larger blocks by default.
|
||||
if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // Default is not defined?
|
||||
} else {
|
||||
if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size;
|
||||
if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value
|
||||
if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // default is not defined ?
|
||||
if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value.
|
||||
if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // Default is not defined?
|
||||
}
|
||||
|
||||
assert(AllocatePrefetchLines > 0, "invalid value");
|
||||
if (AllocatePrefetchLines < 1) // Set valid value in product VM.
|
||||
AllocatePrefetchLines = 1; // Conservative value
|
||||
AllocatePrefetchLines = 1; // Conservative value.
|
||||
|
||||
if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size)
|
||||
AllocatePrefetchStyle = 1; // fall back if inappropriate
|
||||
AllocatePrefetchStyle = 1; // Fall back if inappropriate.
|
||||
|
||||
assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
|
||||
}
|
||||
@ -160,13 +161,13 @@ void VM_Version::determine_section_size() {
|
||||
|
||||
const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord;
|
||||
|
||||
// Allocate space for the code
|
||||
// Allocate space for the code.
|
||||
ResourceMark rm;
|
||||
CodeBuffer cb("detect_section_size", code_size, 0);
|
||||
MacroAssembler* a = new MacroAssembler(&cb);
|
||||
|
||||
uint32_t *code = (uint32_t *)a->pc();
|
||||
// emit code.
|
||||
// Emit code.
|
||||
void (*test1)() = (void(*)())(void *)a->emit_fd();
|
||||
|
||||
Label l1;
|
||||
@ -189,58 +190,58 @@ void VM_Version::determine_section_size() {
|
||||
|
||||
// ;; 1
|
||||
a->nop(); // 5
|
||||
a->fmr(F6, F6); // 6
|
||||
a->fmr(F7, F7); // 7
|
||||
a->fmr(F6, F6); // 6
|
||||
a->fmr(F7, F7); // 7
|
||||
a->endgroup(); // 8
|
||||
// ------- sector 8 ------------
|
||||
|
||||
// ;; 2
|
||||
a->nop(); // 9
|
||||
a->nop(); // 10
|
||||
a->fmr(F8, F8); // 11
|
||||
a->fmr(F9, F9); // 12
|
||||
a->fmr(F8, F8); // 11
|
||||
a->fmr(F9, F9); // 12
|
||||
|
||||
// ;; 3
|
||||
a->nop(); // 13
|
||||
a->fmr(F10, F10); // 14
|
||||
a->fmr(F11, F11); // 15
|
||||
a->fmr(F10, F10); // 14
|
||||
a->fmr(F11, F11); // 15
|
||||
a->endgroup(); // 16
|
||||
// -------- sector 16 -------------
|
||||
|
||||
// ;; 4
|
||||
a->nop(); // 17
|
||||
a->nop(); // 18
|
||||
a->fmr(F15, F15); // 19
|
||||
a->fmr(F16, F16); // 20
|
||||
a->fmr(F15, F15); // 19
|
||||
a->fmr(F16, F16); // 20
|
||||
|
||||
// ;; 5
|
||||
a->nop(); // 21
|
||||
a->fmr(F17, F17); // 22
|
||||
a->fmr(F18, F18); // 23
|
||||
a->fmr(F17, F17); // 22
|
||||
a->fmr(F18, F18); // 23
|
||||
a->endgroup(); // 24
|
||||
// ------- sector 24 ------------
|
||||
|
||||
// ;; 6
|
||||
a->nop(); // 25
|
||||
a->nop(); // 26
|
||||
a->fmr(F19, F19); // 27
|
||||
a->fmr(F20, F20); // 28
|
||||
a->fmr(F19, F19); // 27
|
||||
a->fmr(F20, F20); // 28
|
||||
|
||||
// ;; 7
|
||||
a->nop(); // 29
|
||||
a->fmr(F21, F21); // 30
|
||||
a->fmr(F22, F22); // 31
|
||||
a->fmr(F21, F21); // 30
|
||||
a->fmr(F22, F22); // 31
|
||||
a->brnop0(); // 32
|
||||
|
||||
// ------- sector 32 ------------
|
||||
}
|
||||
|
||||
// ;; 8
|
||||
a->cmpdi(CCR0, R4, unroll);// 33
|
||||
a->bge(CCR0, l1); // 34
|
||||
a->cmpdi(CCR0, R4, unroll); // 33
|
||||
a->bge(CCR0, l1); // 34
|
||||
a->blr();
|
||||
|
||||
// emit code.
|
||||
// Emit code.
|
||||
void (*test2)() = (void(*)())(void *)a->emit_fd();
|
||||
// uint32_t *code = (uint32_t *)a->pc();
|
||||
|
||||
@ -382,39 +383,40 @@ void VM_Version::determine_section_size() {
|
||||
#endif // COMPILER2
|
||||
|
||||
void VM_Version::determine_features() {
|
||||
const int code_size = (num_features+1+2*7)*BytesPerInstWord; // 7 InstWords for each call (function descriptor + blr instruction)
|
||||
// 7 InstWords for each call (function descriptor + blr instruction).
|
||||
const int code_size = (num_features+1+2*7)*BytesPerInstWord;
|
||||
int features = 0;
|
||||
|
||||
// create test area
|
||||
enum { BUFFER_SIZE = 2*4*K }; // needs to be >=2* max cache line size (cache line size can't exceed min page size)
|
||||
enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size).
|
||||
char test_area[BUFFER_SIZE];
|
||||
char *mid_of_test_area = &test_area[BUFFER_SIZE>>1];
|
||||
|
||||
// Allocate space for the code
|
||||
// Allocate space for the code.
|
||||
ResourceMark rm;
|
||||
CodeBuffer cb("detect_cpu_features", code_size, 0);
|
||||
MacroAssembler* a = new MacroAssembler(&cb);
|
||||
|
||||
// emit code.
|
||||
// Emit code.
|
||||
void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->emit_fd();
|
||||
uint32_t *code = (uint32_t *)a->pc();
|
||||
// Don't use R0 in ldarx.
|
||||
// keep R3_ARG1 = R3 unmodified, it contains &field (see below)
|
||||
// keep R4_ARG2 = R4 unmodified, it contains offset = 0 (see below)
|
||||
a->fsqrt(F3, F4); // code[0] -> fsqrt_m
|
||||
a->isel(R7, R5, R6, 0); // code[1] -> isel_m
|
||||
a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1);// code[2] -> lxarx_m
|
||||
a->cmpb(R7, R5, R6); // code[3] -> bcmp
|
||||
//a->mftgpr(R7, F3); // code[4] -> mftgpr
|
||||
a->popcntb(R7, R5); // code[5] -> popcntb
|
||||
a->popcntw(R7, R5); // code[6] -> popcntw
|
||||
a->fcfids(F3, F4); // code[7] -> fcfids
|
||||
a->vand(VR0, VR0, VR0); // code[8] -> vand
|
||||
// Keep R3_ARG1 unmodified, it contains &field (see below).
|
||||
// Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
|
||||
a->fsqrt(F3, F4); // code[0] -> fsqrt_m
|
||||
a->isel(R7, R5, R6, 0); // code[1] -> isel_m
|
||||
a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[2] -> lxarx_m
|
||||
a->cmpb(R7, R5, R6); // code[3] -> bcmp
|
||||
//a->mftgpr(R7, F3); // code[4] -> mftgpr
|
||||
a->popcntb(R7, R5); // code[5] -> popcntb
|
||||
a->popcntw(R7, R5); // code[6] -> popcntw
|
||||
a->fcfids(F3, F4); // code[7] -> fcfids
|
||||
a->vand(VR0, VR0, VR0); // code[8] -> vand
|
||||
a->blr();
|
||||
|
||||
// Emit function to set one cache line to zero
|
||||
void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd(); // emit function descriptor and get pointer to it
|
||||
a->dcbz(R3_ARG1); // R3_ARG1 = R3 = addr
|
||||
// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
|
||||
void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd();
|
||||
a->dcbz(R3_ARG1); // R3_ARG1 = addr
|
||||
a->blr();
|
||||
|
||||
uint32_t *code_end = (uint32_t *)a->pc();
|
||||
@ -428,8 +430,8 @@ void VM_Version::determine_features() {
|
||||
}
|
||||
|
||||
// Measure cache line size.
|
||||
memset(test_area, 0xFF, BUFFER_SIZE); // fill test area with 0xFF
|
||||
(*zero_cacheline_func_ptr)(mid_of_test_area); // call function which executes dcbz to the middle
|
||||
memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF.
|
||||
(*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle.
|
||||
int count = 0; // count zeroed bytes
|
||||
for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++;
|
||||
guarantee(is_power_of_2(count), "cache line size needs to be a power of 2");
|
||||
|
@ -113,7 +113,7 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
|
||||
// If the vtable entry is null, the method is abstract.
|
||||
address ame_addr = __ pc(); // ame = abstract method error
|
||||
|
||||
__ ld_with_trap_null_check(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
|
||||
__ load_with_trap_null_check(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
|
||||
__ mtctr(R12_scratch2);
|
||||
__ bctr();
|
||||
masm->flush();
|
||||
@ -147,7 +147,7 @@ VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
|
||||
|
||||
// Entry arguments:
|
||||
// R19_method: Interface
|
||||
// R3_ARG1: Receiver
|
||||
// R3_ARG1: Receiver
|
||||
//
|
||||
|
||||
const Register rcvr_klass = R11_scratch1;
|
||||
|
@ -34,114 +34,114 @@
|
||||
//
|
||||
// Machine barrier instructions:
|
||||
//
|
||||
// - ppc_sync Two-way memory barrier, aka fence.
|
||||
// - ppc_lwsync orders Store|Store,
|
||||
// Load|Store,
|
||||
// Load|Load,
|
||||
// but not Store|Load
|
||||
// - ppc_eieio orders Store|Store
|
||||
// - ppc_isync Invalidates speculatively executed instructions,
|
||||
// but isync may complete before storage accesses
|
||||
// associated with instructions preceding isync have
|
||||
// been performed.
|
||||
// - sync Two-way memory barrier, aka fence.
|
||||
// - lwsync orders Store|Store,
|
||||
// Load|Store,
|
||||
// Load|Load,
|
||||
// but not Store|Load
|
||||
// - eieio orders Store|Store
|
||||
// - isync Invalidates speculatively executed instructions,
|
||||
// but isync may complete before storage accesses
|
||||
// associated with instructions preceding isync have
|
||||
// been performed.
|
||||
//
|
||||
// Semantic barrier instructions:
|
||||
// (as defined in orderAccess.hpp)
|
||||
//
|
||||
// - ppc_release orders Store|Store, (maps to ppc_lwsync)
|
||||
// Load|Store
|
||||
// - ppc_acquire orders Load|Store, (maps to ppc_lwsync)
|
||||
// Load|Load
|
||||
// - ppc_fence orders Store|Store, (maps to ppc_sync)
|
||||
// Load|Store,
|
||||
// Load|Load,
|
||||
// Store|Load
|
||||
// - release orders Store|Store, (maps to lwsync)
|
||||
// Load|Store
|
||||
// - acquire orders Load|Store, (maps to lwsync)
|
||||
// Load|Load
|
||||
// - fence orders Store|Store, (maps to sync)
|
||||
// Load|Store,
|
||||
// Load|Load,
|
||||
// Store|Load
|
||||
//
|
||||
|
||||
#define inlasm_ppc_sync() __asm__ __volatile__ ("sync" : : : "memory");
|
||||
#define inlasm_ppc_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory");
|
||||
#define inlasm_ppc_eieio() __asm__ __volatile__ ("eieio" : : : "memory");
|
||||
#define inlasm_ppc_isync() __asm__ __volatile__ ("isync" : : : "memory");
|
||||
#define inlasm_ppc_release() inlasm_ppc_lwsync();
|
||||
#define inlasm_ppc_acquire() inlasm_ppc_lwsync();
|
||||
#define inlasm_sync() __asm__ __volatile__ ("sync" : : : "memory");
|
||||
#define inlasm_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory");
|
||||
#define inlasm_eieio() __asm__ __volatile__ ("eieio" : : : "memory");
|
||||
#define inlasm_isync() __asm__ __volatile__ ("isync" : : : "memory");
|
||||
#define inlasm_release() inlasm_lwsync();
|
||||
#define inlasm_acquire() inlasm_lwsync();
|
||||
// Use twi-isync for load_acquire (faster than lwsync).
|
||||
// ATTENTION: seems like xlC 10.1 has problems with this inline assembler macro (VerifyMethodHandles found "bad vminfo in AMH.conv"):
|
||||
// #define inlasm_ppc_acquire_reg(X) __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (X) : "memory");
|
||||
#define inlasm_ppc_acquire_reg(X) inlasm_ppc_lwsync();
|
||||
#define inlasm_ppc_fence() inlasm_ppc_sync();
|
||||
// #define inlasm_acquire_reg(X) __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (X) : "memory");
|
||||
#define inlasm_acquire_reg(X) inlasm_lwsync();
|
||||
#define inlasm_fence() inlasm_sync();
|
||||
|
||||
inline void OrderAccess::loadload() { inlasm_ppc_lwsync(); }
|
||||
inline void OrderAccess::storestore() { inlasm_ppc_lwsync(); }
|
||||
inline void OrderAccess::loadstore() { inlasm_ppc_lwsync(); }
|
||||
inline void OrderAccess::storeload() { inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::loadload() { inlasm_lwsync(); }
|
||||
inline void OrderAccess::storestore() { inlasm_lwsync(); }
|
||||
inline void OrderAccess::loadstore() { inlasm_lwsync(); }
|
||||
inline void OrderAccess::storeload() { inlasm_fence(); }
|
||||
|
||||
inline void OrderAccess::acquire() { inlasm_ppc_acquire(); }
|
||||
inline void OrderAccess::release() { inlasm_ppc_release(); }
|
||||
inline void OrderAccess::fence() { inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::acquire() { inlasm_acquire(); }
|
||||
inline void OrderAccess::release() { inlasm_release(); }
|
||||
inline void OrderAccess::fence() { inlasm_fence(); }
|
||||
|
||||
inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { register jbyte t = *p; inlasm_ppc_acquire_reg(t); return t; }
|
||||
inline jshort OrderAccess::load_acquire(volatile jshort* p) { register jshort t = *p; inlasm_ppc_acquire_reg(t); return t; }
|
||||
inline jint OrderAccess::load_acquire(volatile jint* p) { register jint t = *p; inlasm_ppc_acquire_reg(t); return t; }
|
||||
inline jlong OrderAccess::load_acquire(volatile jlong* p) { register jlong t = *p; inlasm_ppc_acquire_reg(t); return t; }
|
||||
inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { register jubyte t = *p; inlasm_ppc_acquire_reg(t); return t; }
|
||||
inline jushort OrderAccess::load_acquire(volatile jushort* p) { register jushort t = *p; inlasm_ppc_acquire_reg(t); return t; }
|
||||
inline juint OrderAccess::load_acquire(volatile juint* p) { register juint t = *p; inlasm_ppc_acquire_reg(t); return t; }
|
||||
inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { register jbyte t = *p; inlasm_acquire_reg(t); return t; }
|
||||
inline jshort OrderAccess::load_acquire(volatile jshort* p) { register jshort t = *p; inlasm_acquire_reg(t); return t; }
|
||||
inline jint OrderAccess::load_acquire(volatile jint* p) { register jint t = *p; inlasm_acquire_reg(t); return t; }
|
||||
inline jlong OrderAccess::load_acquire(volatile jlong* p) { register jlong t = *p; inlasm_acquire_reg(t); return t; }
|
||||
inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { register jubyte t = *p; inlasm_acquire_reg(t); return t; }
|
||||
inline jushort OrderAccess::load_acquire(volatile jushort* p) { register jushort t = *p; inlasm_acquire_reg(t); return t; }
|
||||
inline juint OrderAccess::load_acquire(volatile juint* p) { register juint t = *p; inlasm_acquire_reg(t); return t; }
|
||||
inline julong OrderAccess::load_acquire(volatile julong* p) { return (julong)load_acquire((volatile jlong*)p); }
|
||||
inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { register jfloat t = *p; inlasm_ppc_acquire(); return t; }
|
||||
inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { register jdouble t = *p; inlasm_ppc_acquire(); return t; }
|
||||
inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { register jfloat t = *p; inlasm_acquire(); return t; }
|
||||
inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { register jdouble t = *p; inlasm_acquire(); return t; }
|
||||
|
||||
inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { return (intptr_t)load_acquire((volatile jlong*)p); }
|
||||
inline void* OrderAccess::load_ptr_acquire(volatile void* p) { return (void*) load_acquire((volatile jlong*)p); }
|
||||
inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { return (void*) load_acquire((volatile jlong*)p); }
|
||||
|
||||
inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { inlasm_ppc_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jshort* p, jshort v) { inlasm_ppc_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jint* p, jint v) { inlasm_ppc_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jlong* p, jlong v) { inlasm_ppc_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { inlasm_ppc_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jushort* p, jushort v) { inlasm_ppc_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile juint* p, juint v) { inlasm_ppc_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile julong* p, julong v) { inlasm_ppc_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { inlasm_ppc_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { inlasm_ppc_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { inlasm_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jshort* p, jshort v) { inlasm_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jint* p, jint v) { inlasm_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jlong* p, jlong v) { inlasm_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { inlasm_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jushort* p, jushort v) { inlasm_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile juint* p, juint v) { inlasm_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile julong* p, julong v) { inlasm_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { inlasm_release(); *p = v; }
|
||||
inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { inlasm_release(); *p = v; }
|
||||
|
||||
inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { inlasm_ppc_release(); *p = v; }
|
||||
inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { inlasm_ppc_release(); *(void* volatile *)p = v; }
|
||||
inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { inlasm_release(); *p = v; }
|
||||
inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { inlasm_release(); *(void* volatile *)p = v; }
|
||||
|
||||
inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; inlasm_fence(); }
|
||||
|
||||
inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; inlasm_fence(); }
|
||||
|
||||
inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { inlasm_release(); *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { inlasm_release(); *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { inlasm_release(); *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { inlasm_release(); *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { inlasm_release(); *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { inlasm_release(); *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { inlasm_release(); *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { inlasm_release(); *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { inlasm_release(); *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { inlasm_release(); *p = v; inlasm_fence(); }
|
||||
|
||||
inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { inlasm_ppc_release(); *(void* volatile *)p = v; inlasm_ppc_fence(); }
|
||||
inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { inlasm_release(); *p = v; inlasm_fence(); }
|
||||
inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { inlasm_release(); *(void* volatile *)p = v; inlasm_fence(); }
|
||||
|
||||
#undef inlasm_ppc_sync
|
||||
#undef inlasm_ppc_lwsync
|
||||
#undef inlasm_ppc_eieio
|
||||
#undef inlasm_ppc_isync
|
||||
#undef inlasm_ppc_release
|
||||
#undef inlasm_ppc_acquire
|
||||
#undef inlasm_ppc_fence
|
||||
#undef inlasm_sync
|
||||
#undef inlasm_lwsync
|
||||
#undef inlasm_eieio
|
||||
#undef inlasm_isync
|
||||
#undef inlasm_release
|
||||
#undef inlasm_acquire
|
||||
#undef inlasm_fence
|
||||
|
||||
#endif // OS_CPU_AIX_OJDKPPC_VM_ORDERACCESS_AIX_PPC_INLINE_HPP
|
||||
|
@ -67,7 +67,7 @@ address os::current_stack_pointer() {
|
||||
address csp;
|
||||
|
||||
#if !defined(USE_XLC_BUILTINS)
|
||||
// inline assembly for `ppc_mr regno(csp), PPC_SP':
|
||||
// inline assembly for `mr regno(csp), R1_SP':
|
||||
__asm__ __volatile__ ("mr %0, 1":"=r"(csp):);
|
||||
#else
|
||||
csp = (address) __builtin_frame_address(0);
|
||||
@ -263,7 +263,7 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
|
||||
tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
|
||||
goto report_and_die;
|
||||
} else {
|
||||
// this means a segv happened inside our stack, but not in
|
||||
// This means a segv happened inside our stack, but not in
|
||||
// the guarded zone. I'd like to know when this happens,
|
||||
tty->print_raw_cr("SIGSEGV happened inside stack but outside yellow and red zone.");
|
||||
goto report_and_die;
|
||||
@ -312,53 +312,57 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
|
||||
// in the zero page, because it is filled with 0x0. We ignore
|
||||
// explicit SIGILLs in the zero page.
|
||||
if (sig == SIGILL && (pc < (address) 0x200)) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_raw_cr("SIGILL happened inside zero page.");
|
||||
}
|
||||
goto report_and_die;
|
||||
}
|
||||
|
||||
// Handle signal from NativeJump::patch_verified_entry().
|
||||
if (( TrapBasedNotEntrantChecks && sig == SIGTRAP && nativeInstruction_at(pc)->is_sigtrap_zombie_not_entrant()) ||
|
||||
(!TrapBasedNotEntrantChecks && sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant())) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
|
||||
}
|
||||
stub = SharedRuntime::get_handle_wrong_method_stub();
|
||||
goto run_stub;
|
||||
}
|
||||
|
||||
else if (sig == SIGSEGV && os::is_poll_address(addr)) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_cr("trap: safepoint_poll at " INTPTR_FORMAT " (SIGSEGV)", pc);
|
||||
}
|
||||
stub = SharedRuntime::get_poll_stub(pc);
|
||||
goto run_stub;
|
||||
}
|
||||
|
||||
// SIGTRAP-based ic miss check in compiled code
|
||||
// SIGTRAP-based ic miss check in compiled code.
|
||||
else if (sig == SIGTRAP && TrapBasedICMissChecks &&
|
||||
nativeInstruction_at(pc)->is_sigtrap_ic_miss_check()) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_cr("trap: ic_miss_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
|
||||
}
|
||||
stub = SharedRuntime::get_ic_miss_stub();
|
||||
goto run_stub;
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
// SIGTRAP-based implicit null check in compiled code.
|
||||
else if (sig == SIGTRAP && TrapBasedNullChecks &&
|
||||
nativeInstruction_at(pc)->is_sigtrap_null_check()) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
|
||||
}
|
||||
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
|
||||
goto run_stub;
|
||||
}
|
||||
#endif
|
||||
|
||||
// SIGSEGV-based implicit null check in compiled code.
|
||||
else if (sig == SIGSEGV && ImplicitNullChecks &&
|
||||
CodeCache::contains((void*) pc) &&
|
||||
!MacroAssembler::needs_explicit_null_check((intptr_t) info->si_addr)) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGSEGV)", pc);
|
||||
}
|
||||
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
|
||||
}
|
||||
|
||||
@ -366,8 +370,9 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
|
||||
// SIGTRAP-based implicit range check in compiled code.
|
||||
else if (sig == SIGTRAP && TrapBasedRangeChecks &&
|
||||
nativeInstruction_at(pc)->is_sigtrap_range_check()) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_cr("trap: range_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
|
||||
}
|
||||
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
|
||||
goto run_stub;
|
||||
}
|
||||
|
@ -58,35 +58,35 @@ inline jlong Atomic::load(volatile jlong* src) { return *src; }
|
||||
|
||||
- sync two-way memory barrier, aka fence
|
||||
- lwsync orders Store|Store,
|
||||
Load|Store,
|
||||
Load|Load,
|
||||
but not Store|Load
|
||||
Load|Store,
|
||||
Load|Load,
|
||||
but not Store|Load
|
||||
- eieio orders memory accesses for device memory (only)
|
||||
- isync invalidates speculatively executed instructions
|
||||
From the POWER ISA 2.06 documentation:
|
||||
"[...] an isync instruction prevents the execution of
|
||||
instructions following the isync until instructions
|
||||
preceding the isync have completed, [...]"
|
||||
From IBM's AIX assembler reference:
|
||||
"The isync [...] instructions causes the processor to
|
||||
refetch any instructions that might have been fetched
|
||||
prior to the isync instruction. The instruction isync
|
||||
causes the processor to wait for all previous instructions
|
||||
to complete. Then any instructions already fetched are
|
||||
discarded and instruction processing continues in the
|
||||
environment established by the previous instructions."
|
||||
From the POWER ISA 2.06 documentation:
|
||||
"[...] an isync instruction prevents the execution of
|
||||
instructions following the isync until instructions
|
||||
preceding the isync have completed, [...]"
|
||||
From IBM's AIX assembler reference:
|
||||
"The isync [...] instructions causes the processor to
|
||||
refetch any instructions that might have been fetched
|
||||
prior to the isync instruction. The instruction isync
|
||||
causes the processor to wait for all previous instructions
|
||||
to complete. Then any instructions already fetched are
|
||||
discarded and instruction processing continues in the
|
||||
environment established by the previous instructions."
|
||||
|
||||
semantic barrier instructions:
|
||||
(as defined in orderAccess.hpp)
|
||||
|
||||
- release orders Store|Store, (maps to lwsync)
|
||||
Load|Store
|
||||
Load|Store
|
||||
- acquire orders Load|Store, (maps to lwsync)
|
||||
Load|Load
|
||||
Load|Load
|
||||
- fence orders Store|Store, (maps to sync)
|
||||
Load|Store,
|
||||
Load|Load,
|
||||
Store|Load
|
||||
Load|Store,
|
||||
Load|Load,
|
||||
Store|Load
|
||||
*/
|
||||
|
||||
#define strasm_sync "\n sync \n"
|
||||
|
@ -40,26 +40,26 @@
|
||||
//
|
||||
// - sync Two-way memory barrier, aka fence.
|
||||
// - lwsync orders Store|Store,
|
||||
// Load|Store,
|
||||
// Load|Load,
|
||||
// but not Store|Load
|
||||
// Load|Store,
|
||||
// Load|Load,
|
||||
// but not Store|Load
|
||||
// - eieio orders Store|Store
|
||||
// - isync Invalidates speculatively executed instructions,
|
||||
// but isync may complete before storage accesses
|
||||
// associated with instructions preceding isync have
|
||||
// been performed.
|
||||
// but isync may complete before storage accesses
|
||||
// associated with instructions preceding isync have
|
||||
// been performed.
|
||||
//
|
||||
// Semantic barrier instructions:
|
||||
// (as defined in orderAccess.hpp)
|
||||
//
|
||||
// - release orders Store|Store, (maps to lwsync)
|
||||
// Load|Store
|
||||
// Load|Store
|
||||
// - acquire orders Load|Store, (maps to lwsync)
|
||||
// Load|Load
|
||||
// Load|Load
|
||||
// - fence orders Store|Store, (maps to sync)
|
||||
// Load|Store,
|
||||
// Load|Load,
|
||||
// Store|Load
|
||||
// Load|Store,
|
||||
// Load|Load,
|
||||
// Store|Load
|
||||
//
|
||||
|
||||
#define inlasm_sync() __asm__ __volatile__ ("sync" : : : "memory");
|
||||
|
@ -284,16 +284,18 @@ JVM_handle_linux_signal(int sig,
|
||||
// in the zero page, because it is filled with 0x0. We ignore
|
||||
// explicit SIGILLs in the zero page.
|
||||
if (sig == SIGILL && (pc < (address) 0x200)) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_raw_cr("SIGILL happened inside zero page.");
|
||||
}
|
||||
goto report_and_die;
|
||||
}
|
||||
|
||||
// Handle signal from NativeJump::patch_verified_entry().
|
||||
if (( TrapBasedNotEntrantChecks && sig == SIGTRAP && nativeInstruction_at(pc)->is_sigtrap_zombie_not_entrant()) ||
|
||||
(!TrapBasedNotEntrantChecks && sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant())) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
|
||||
}
|
||||
stub = SharedRuntime::get_handle_wrong_method_stub();
|
||||
}
|
||||
|
||||
@ -304,24 +306,27 @@ JVM_handle_linux_signal(int sig,
|
||||
// (address)info->si_addr == os::get_standard_polling_page()
|
||||
// doesn't work for us. We use:
|
||||
((NativeInstruction*)pc)->is_safepoint_poll()) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_cr("trap: safepoint_poll at " INTPTR_FORMAT " (SIGSEGV)", pc);
|
||||
}
|
||||
stub = SharedRuntime::get_poll_stub(pc);
|
||||
}
|
||||
|
||||
// SIGTRAP-based ic miss check in compiled code.
|
||||
else if (sig == SIGTRAP && TrapBasedICMissChecks &&
|
||||
nativeInstruction_at(pc)->is_sigtrap_ic_miss_check()) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_cr("trap: ic_miss_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
|
||||
}
|
||||
stub = SharedRuntime::get_ic_miss_stub();
|
||||
}
|
||||
|
||||
// SIGTRAP-based implicit null check in compiled code.
|
||||
else if (sig == SIGTRAP && TrapBasedNullChecks &&
|
||||
nativeInstruction_at(pc)->is_sigtrap_null_check()) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
|
||||
}
|
||||
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
|
||||
}
|
||||
|
||||
@ -329,8 +334,9 @@ JVM_handle_linux_signal(int sig,
|
||||
else if (sig == SIGSEGV && ImplicitNullChecks &&
|
||||
CodeCache::contains((void*) pc) &&
|
||||
!MacroAssembler::needs_explicit_null_check((intptr_t) info->si_addr)) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGSEGV)", pc);
|
||||
}
|
||||
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
|
||||
}
|
||||
|
||||
@ -338,8 +344,9 @@ JVM_handle_linux_signal(int sig,
|
||||
// SIGTRAP-based implicit range check in compiled code.
|
||||
else if (sig == SIGTRAP && TrapBasedRangeChecks &&
|
||||
nativeInstruction_at(pc)->is_sigtrap_range_check()) {
|
||||
if (TraceTraps)
|
||||
if (TraceTraps) {
|
||||
tty->print_cr("trap: range_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
|
||||
}
|
||||
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
|
||||
}
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user