8029940: PPC64 (part 122): C2 compiler port

Reviewed-by: kvn
This commit is contained in:
Goetz Lindenmaier 2013-12-11 00:06:11 +01:00
parent c50c083f83
commit 7d56518671
40 changed files with 13274 additions and 593 deletions

View File

@ -41,13 +41,11 @@ SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad
ifeq ("${Platform_arch_model}", "${Platform_arch}")
SOURCES.AD = \
$(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
$(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
$(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad)
else
SOURCES.AD = \
$(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
$(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
$(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
$(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad)
endif
EXEC = $(OUTDIR)/adlc

View File

@ -98,7 +98,17 @@ class Argument VALUE_OBJ_CLASS_SPEC {
// Only 8 registers may contain integer parameters.
n_register_parameters = 8,
// Can have up to 8 floating registers.
n_float_register_parameters = 8
n_float_register_parameters = 8,
// PPC C calling conventions.
// The first eight arguments are passed in int regs if they are int.
n_int_register_parameters_c = 8,
// The first thirteen float arguments are passed in float regs.
n_float_register_parameters_c = 13,
// Only the first 8 parameters are not placed on the stack. Aix disassembly
// shows that xlC places all float args after argument 8 on the stack AND
// in a register. This is not documented, but we follow this convention, too.
n_regs_not_on_stack_c = 8,
};
// creation
Argument(int number) : _number(number) {}
@ -662,6 +672,14 @@ class Assembler : public AbstractAssembler {
bcondCRbiIs1_bhintIsTaken = bcondCRbiIs1 | bhintatIsTaken,
};
// Elemental Memory Barriers (>=Power 8)
enum Elemental_Membar_mask_bits {
StoreStore = 1 << 0,
StoreLoad = 1 << 1,
LoadStore = 1 << 2,
LoadLoad = 1 << 3
};
// Branch prediction hints.
inline static int add_bhint_to_boint(const int bhint, const int boint) {
switch (boint) {
@ -753,17 +771,6 @@ class Assembler : public AbstractAssembler {
enum Predict { pt = 1, pn = 0 }; // pt = predict taken
enum Membar_mask_bits { // page 184, v9
StoreStore = 1 << 3,
LoadStore = 1 << 2,
StoreLoad = 1 << 1,
LoadLoad = 1 << 0,
Sync = 1 << 6,
MemIssue = 1 << 5,
Lookaside = 1 << 4
};
// instruction must start at passed address
static int instr_len(unsigned char *instr) { return BytesPerInstWord; }
@ -875,19 +882,20 @@ class Assembler : public AbstractAssembler {
#define inv_opp_s_field(x, hi_bit, lo_bit) inv_s_field_ppc(x, 31-(lo_bit), 31-(hi_bit))
// Extract instruction fields from instruction words.
public:
static int inv_ra_field(int x) { return inv_opp_u_field(x, 15, 11); }
static int inv_rb_field(int x) { return inv_opp_u_field(x, 20, 16); }
static int inv_rt_field(int x) { return inv_opp_u_field(x, 10, 6); }
static int inv_rs_field(int x) { return inv_opp_u_field(x, 10, 6); }
static int inv_ra_field(int x) { return inv_opp_u_field(x, 15, 11); }
static int inv_rb_field(int x) { return inv_opp_u_field(x, 20, 16); }
static int inv_rt_field(int x) { return inv_opp_u_field(x, 10, 6); }
static int inv_rta_field(int x) { return inv_opp_u_field(x, 15, 11); }
static int inv_rs_field(int x) { return inv_opp_u_field(x, 10, 6); }
// Ds uses opp_s_field(x, 31, 16), but lowest 2 bits must be 0.
// Inv_ds_field uses range (x, 29, 16) but shifts by 2 to ensure that lowest bits are 0.
static int inv_ds_field(int x) { return inv_opp_s_field(x, 29, 16) << 2; }
static int inv_d1_field(int x) { return inv_opp_s_field(x, 31, 16); }
static int inv_si_field(int x) { return inv_opp_s_field(x, 31, 16); }
static int inv_to_field(int x) { return inv_opp_u_field(x, 10, 6); }
static int inv_lk_field(int x) { return inv_opp_u_field(x, 31, 31); }
static int inv_bo_field(int x) { return inv_opp_u_field(x, 10, 6); }
static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); }
static int inv_ds_field(int x) { return inv_opp_s_field(x, 29, 16) << 2; }
static int inv_d1_field(int x) { return inv_opp_s_field(x, 31, 16); }
static int inv_si_field(int x) { return inv_opp_s_field(x, 31, 16); }
static int inv_to_field(int x) { return inv_opp_u_field(x, 10, 6); }
static int inv_lk_field(int x) { return inv_opp_u_field(x, 31, 31); }
static int inv_bo_field(int x) { return inv_opp_u_field(x, 10, 6); }
static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); }
#define opp_u_field(x, hi_bit, lo_bit) u_field(x, 31-(lo_bit), 31-(hi_bit))
#define opp_s_field(x, hi_bit, lo_bit) s_field(x, 31-(lo_bit), 31-(hi_bit))
@ -925,6 +933,7 @@ class Assembler : public AbstractAssembler {
static int l10( int x) { return opp_u_field(x, 10, 10); }
static int l15( int x) { return opp_u_field(x, 15, 15); }
static int l910( int x) { return opp_u_field(x, 10, 9); }
static int e1215( int x) { return opp_u_field(x, 15, 12); }
static int lev( int x) { return opp_u_field(x, 26, 20); }
static int li( int x) { return opp_s_field(x, 29, 6); }
static int lk( int x) { return opp_u_field(x, 31, 31); }
@ -960,13 +969,13 @@ class Assembler : public AbstractAssembler {
static int sr( int x) { return opp_u_field(x, 15, 12); }
static int tbr( int x) { return opp_u_field(x, 20, 11); }
static int th( int x) { return opp_u_field(x, 10, 7); }
static int thct( int x) { assert((x&8)==0, "must be valid cache specification"); return th(x); }
static int thds( int x) { assert((x&8)==8, "must be valid stream specification"); return th(x); }
static int thct( int x) { assert((x&8) == 0, "must be valid cache specification"); return th(x); }
static int thds( int x) { assert((x&8) == 8, "must be valid stream specification"); return th(x); }
static int to( int x) { return opp_u_field(x, 10, 6); }
static int u( int x) { return opp_u_field(x, 19, 16); }
static int ui( int x) { return opp_u_field(x, 31, 16); }
// support vector instructions for >= Power6
// Support vector instructions for >= Power6.
static int vra( int x) { return opp_u_field(x, 15, 11); }
static int vrb( int x) { return opp_u_field(x, 20, 16); }
static int vrc( int x) { return opp_u_field(x, 25, 21); }
@ -1090,8 +1099,8 @@ class Assembler : public AbstractAssembler {
inline void subfic( Register d, Register a, int si16);
inline void add( Register d, Register a, Register b);
inline void add_( Register d, Register a, Register b);
inline void subf( Register d, Register a, Register b);
inline void sub( Register d, Register a, Register b);
inline void subf( Register d, Register a, Register b); // d = b - a "Sub_from", as in ppc spec.
inline void sub( Register d, Register a, Register b); // d = a - b Swap operands of subf for readability.
inline void subf_( Register d, Register a, Register b);
inline void addc( Register d, Register a, Register b);
inline void addc_( Register d, Register a, Register b);
@ -1204,7 +1213,7 @@ class Assembler : public AbstractAssembler {
}
// endgroup opcode for Power6
static bool is_endgroup(int x) {
return is_ori(x) && inv_ra_field(x)==1 && inv_rs_field(x)==1 && inv_d1_field(x)==0;
return is_ori(x) && inv_ra_field(x) == 1 && inv_rs_field(x) == 1 && inv_d1_field(x) == 0;
}
@ -1227,9 +1236,13 @@ class Assembler : public AbstractAssembler {
inline void cmpld( ConditionRegister crx, Register a, Register b);
inline void isel( Register d, Register a, Register b, int bc);
// Convenient version which takes: Condition register, Condition code and invert flag. Omit b to keep old value.
inline void isel( Register d, ConditionRegister cr, Condition cc, bool inv, Register a, Register b = noreg);
// Set d = 0 if (cr.cc) equals 1, otherwise b.
inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg);
// PPC 1, section 3.3.11, Fixed-Point Logical Instructions
void andi( Register a, Register s, int ui16); // optimized version
void andi( Register a, Register s, int ui16); // optimized version
inline void andi_( Register a, Register s, int ui16);
inline void andis_( Register a, Register s, int ui16);
inline void ori( Register a, Register s, int ui16);
@ -1553,10 +1566,7 @@ class Assembler : public AbstractAssembler {
inline void ptesync();
inline void eieio();
inline void isync();
inline void release();
inline void acquire();
inline void fence();
inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8)
// atomics
inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
@ -1938,7 +1948,7 @@ class Assembler : public AbstractAssembler {
inline void load_const(Register d, AddressLiteral& a, Register tmp = noreg);
// Load a 64 bit constant, optimized, not identifyable.
// Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
// Tmp can be used to increase ILP. Set return_simm16_rest = true to get a
// 16 bit immediate offset. This is useful if the offset can be encoded in
// a succeeding instruction.
int load_const_optimized(Register d, long a, Register tmp = noreg, bool return_simm16_rest = false);

View File

@ -224,8 +224,12 @@ inline void Assembler::clrlsldi_(Register a, Register s, int clrl6, int shl6) {
inline void Assembler::extrdi( Register a, Register s, int n, int b){ Assembler::rldicl(a, s, b+n, 64-n); }
// testbit with condition register.
inline void Assembler::testbitdi(ConditionRegister cr, Register a, Register s, int ui6) {
Assembler::rldicr(a, s, 63-ui6, 0);
Assembler::cmpdi(cr, a, 0);
if (cr == CCR0) {
Assembler::rldicr_(a, s, 63-ui6, 0);
} else {
Assembler::rldicr(a, s, 63-ui6, 0);
Assembler::cmpdi(cr, a, 0);
}
}
// rotate instructions
@ -423,6 +427,27 @@ inline void Assembler::creqv( int d, int s1, int s2) { emit_int32(CREQV_OPCODE
inline void Assembler::crandc(int d, int s1, int s2) { emit_int32(CRANDC_OPCODE | bt(d) | ba(s1) | bb(s2)); }
inline void Assembler::crorc( int d, int s1, int s2) { emit_int32(CRORC_OPCODE | bt(d) | ba(s1) | bb(s2)); }
// Conditional move (>= Power7)
inline void Assembler::isel(Register d, ConditionRegister cr, Condition cc, bool inv, Register a, Register b) {
if (b == noreg) {
b = d; // Can be omitted if old value should be kept in "else" case.
}
Register first = a;
Register second = b;
if (inv) {
first = b;
second = a; // exchange
}
assert(first != R0, "r0 not allowed");
isel(d, first, second, bi0(cr, cc));
}
inline void Assembler::isel_0(Register d, ConditionRegister cr, Condition cc, Register b) {
if (b == noreg) {
b = d; // Can be omitted if old value should be kept in "else" case.
}
isel(d, R0, b, bi0(cr, cc));
}
// PPC 2, section 3.2.1 Instruction Cache Instructions
inline void Assembler::icbi( Register s1, Register s2) { emit_int32( ICBI_OPCODE | ra0mem(s1) | rb(s2) ); }
// PPC 2, section 3.2.2 Data Cache Instructions
@ -445,10 +470,7 @@ inline void Assembler::lwsync() { Assembler::sync(1); }
inline void Assembler::ptesync() { Assembler::sync(2); }
inline void Assembler::eieio() { emit_int32( EIEIO_OPCODE); }
inline void Assembler::isync() { emit_int32( ISYNC_OPCODE); }
inline void Assembler::release() { Assembler::lwsync(); }
inline void Assembler::acquire() { Assembler::lwsync(); }
inline void Assembler::fence() { Assembler::sync(); }
inline void Assembler::elemental_membar(int e) { assert(0 < e && e < 16, "invalid encoding"); emit_int32( SYNC_OPCODE | e1215(e)); }
// atomics
// Use ra0mem to disallow R0 as base.
@ -767,7 +789,6 @@ inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL
inline void Assembler::lvsl( VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | rb(s2)); }
inline void Assembler::lvsr( VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | rb(s2)); }
inline void Assembler::load_const(Register d, void* x, Register tmp) {
load_const(d, (long)x, tmp);
}

View File

@ -100,6 +100,7 @@ public:
#define SET_LOCALS_DOUBLE(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value))
#define SET_LOCALS_LONG(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value))
#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \
((VMJavaVal64*)(addr))->d)
#endif // CPU_PPC_VM_BYTECODEINTERPRETER_PPC_PP

View File

@ -33,7 +33,7 @@ class Bytes: AllStatic {
// Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
// PowerPC needs to check for alignment.
// can I count on address always being a pointer to an unsigned char? Yes
// Can I count on address always being a pointer to an unsigned char? Yes.
// Returns true, if the byte ordering used by Java is different from the nativ byte ordering
// of the underlying machine. For example, true for Intel x86, False, for Solaris on Sparc.
@ -141,7 +141,6 @@ class Bytes: AllStatic {
}
}
// Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering)
// (no byte-order reversal is needed since Power CPUs are big-endian oriented).
static inline u2 get_Java_u2(address p) { return get_native_u2(p); }

View File

@ -0,0 +1,98 @@
/*
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2013 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef CPU_PPC_VM_C2_GLOBALS_PPC_HPP
#define CPU_PPC_VM_C2_GLOBALS_PPC_HPP
#include "utilities/globalDefinitions.hpp"
#include "utilities/macros.hpp"
// Sets the default values for platform dependent flags used by the server compiler.
// (see c2_globals.hpp).
define_pd_global(bool, BackgroundCompilation, true);
define_pd_global(bool, CICompileOSR, true);
define_pd_global(bool, InlineIntrinsics, true);
define_pd_global(bool, PreferInterpreterNativeStubs, false);
define_pd_global(bool, ProfileTraps, true);
define_pd_global(bool, UseOnStackReplacement, true);
define_pd_global(bool, ProfileInterpreter, true);
define_pd_global(bool, TieredCompilation, false);
define_pd_global(intx, CompileThreshold, 10000);
define_pd_global(intx, BackEdgeThreshold, 140000);
define_pd_global(intx, OnStackReplacePercentage, 140);
define_pd_global(intx, ConditionalMoveLimit, 3);
define_pd_global(intx, FLOATPRESSURE, 28);
define_pd_global(intx, FreqInlineSize, 175);
define_pd_global(intx, MinJumpTableSize, 10);
define_pd_global(intx, INTPRESSURE, 25);
define_pd_global(intx, InteriorEntryAlignment, 16);
define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, RegisterCostAreaRatio, 16000);
define_pd_global(bool, UseTLAB, true);
define_pd_global(bool, ResizeTLAB, true);
define_pd_global(intx, LoopUnrollLimit, 60);
// Peephole and CISC spilling both break the graph, and so make the
// scheduler sick.
define_pd_global(bool, OptoPeephole, false);
define_pd_global(bool, UseCISCSpill, false);
define_pd_global(bool, OptoBundling, false);
// GL:
// Detected a problem with unscaled compressed oops and
// narrow_oop_use_complex_address() == false.
// -Djava.io.tmpdir=./tmp -jar SPECjvm2008.jar -ikv -wt 3 -it 3
// -bt 1 --base compiler.sunflow
// fails in Lower.visitIf->translate->tranlate->translate and
// throws an unexpected NPE. A load and a store seem to be
// reordered. Java reads about:
// loc = x.f
// x.f = 0
// NullCheck loc
// While assembler reads:
// x.f = 0
// loc = x.f
// NullCheck loc
define_pd_global(bool, OptoScheduling, false);
define_pd_global(intx, InitialCodeCacheSize, 2048*K); // Integral multiple of CodeCacheExpansionSize
define_pd_global(intx, ReservedCodeCacheSize, 256*M);
define_pd_global(intx, CodeCacheExpansionSize, 64*K);
// Ergonomics related flags
define_pd_global(uint64_t,MaxRAM, 4ULL*G);
define_pd_global(uintx, CodeCacheMinBlockLength, 4);
define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K);
define_pd_global(bool, TrapBasedRangeChecks, false);
// Heap related flags
define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M));
// Ergonomics related flags
define_pd_global(bool, NeverActAsServerClassMachine, false);
#endif // CPU_PPC_VM_C2_GLOBALS_PPC_HPP

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2013 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "opto/compile.hpp"
#include "opto/node.hpp"
#include "runtime/globals.hpp"
#include "utilities/debug.hpp"
// processor dependent initialization for ppc
void Compile::pd_compiler2_init() {
// Power7 and later
if (PowerArchitecturePPC64 > 6) {
if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
FLAG_SET_ERGO(bool, UsePopCountInstruction, true);
}
}
if (PowerArchitecturePPC64 == 6) {
if (FLAG_IS_DEFAULT(InsertEndGroupPPC64)) {
FLAG_SET_ERGO(bool, InsertEndGroupPPC64, true);
}
}
}

View File

@ -105,10 +105,12 @@ static void copy_conjoint_atomic(T* from, T* to, size_t count) {
}
static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
// TODO: contribute optimized version.
copy_conjoint_atomic<jshort>(from, to, count);
}
static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
// TODO: contribute optimized version.
copy_conjoint_atomic<jint>(from, to, count);
}
@ -125,10 +127,12 @@ static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count
}
static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
// TODO: contribute optimized version.
pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
}
static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
// TODO: contribute optimized version.
pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
}

View File

@ -1981,8 +1981,7 @@ address CppInterpreterGenerator::generate_normal_entry(void) {
// Restore R14_state.
__ ld(R14_state, 0, R1_SP);
__ addi(R14_state, R14_state,
-frame::interpreter_frame_cinterpreterstate_size_in_bytes());
__ addi(R14_state, R14_state, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
//
// Registers alive

View File

@ -176,13 +176,14 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result)
Method* method = interpreter_frame_method();
BasicType type = method->result_type();
#ifdef CC_INTERP
if (method->is_native()) {
// Prior to calling into the runtime to notify the method exit the possible
// result value is saved into the interpreter frame.
#ifdef CC_INTERP
interpreterState istate = get_interpreterState();
address lresult = (address)istate + in_bytes(BytecodeInterpreter::native_lresult_offset());
address fresult = (address)istate + in_bytes(BytecodeInterpreter::native_fresult_offset());
#endif
switch (method->result_type()) {
case T_OBJECT:
@ -226,9 +227,6 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result)
default : ShouldNotReachHere();
}
}
#else
Unimplemented();
#endif
return type;
}

View File

@ -421,7 +421,7 @@
#ifdef CC_INTERP
// Additional interface for interpreter frames:
inline interpreterState get_interpreterState() const;
#endif
#endif // CC_INTERP
// Size of a monitor in bytes.
static int interpreter_frame_monitor_size_in_bytes();
@ -431,7 +431,6 @@
private:
// PPC port: permgen stuff
ConstantPoolCache** interpreter_frame_cpoolcache_addr() const;
public:

View File

@ -78,11 +78,8 @@ inline frame::frame(intptr_t* sp, address pc, intptr_t* unextended_sp) : _sp(sp)
// can distinguish identity and younger/older relationship. NULL
// represents an invalid (incomparable) frame.
inline intptr_t* frame::id(void) const {
// Use the _unextended_pc as the frame's ID. Because we have no
// adapters, but resized compiled frames, some of the new code
// (e.g. JVMTI) wouldn't work if we return the (current) SP of the
// frame.
return _unextended_sp;
// Use _fp. _sp or _unextended_sp wouldn't be correct due to resizing.
return _fp;
}
// Return true if this frame is older (less recent activation) than

View File

@ -62,6 +62,13 @@ define_pd_global(uintx, TypeProfileLevel, 0);
// Platform dependent flag handling: flags only defined on this platform.
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
\
/* Load poll address from thread. This is used to implement per-thread */ \
/* safepoints on platforms != IA64. */ \
product(bool, LoadPollAddressFromThread, false, \
"Load polling page address from thread object (required for " \
"per-thread safepoints on platforms != IA64)") \
\
product(uintx, PowerArchitecturePPC64, 0, \
"CPU Version: x for PowerX. Currently recognizes Power5 to " \
"Power7. Default is 0. CPUs newer than Power7 will be " \
@ -88,6 +95,14 @@ define_pd_global(uintx, TypeProfileLevel, 0);
\
product(bool, UseStaticBranchPredictionInCompareAndSwapPPC64, true, \
"Use static branch prediction hints in CAS operations.") \
product(bool, UseStaticBranchPredictionForUncommonPathsPPC64, false, \
"Use static branch prediction hints for uncommon paths.") \
\
product(bool, UsePower6SchedulerPPC64, false, \
"Use Power6 Scheduler.") \
\
product(bool, InsertEndGroupPPC64, false, \
"Insert EndGroup instructions to optimize for Power6.") \
\
/* Trap based checks. */ \
/* Trap based checks use the ppc trap instructions to check certain */ \
@ -108,5 +123,4 @@ define_pd_global(uintx, TypeProfileLevel, 0);
" Use this to ease debugging.") \
#endif // CPU_PPC_VM_GLOBALS_PPC_HPP

View File

@ -28,17 +28,17 @@
#include "runtime/icache.hpp"
// Use inline assembler to implement icache flush.
int ppc64_flush_icache(address start, int lines, int magic){
int ICache::ppc64_flush_icache(address start, int lines, int magic) {
address end = start + (unsigned int)lines*ICache::line_size;
assert(start <= end, "flush_icache parms");
// store modified cache lines from data cache
for (address a=start; a<end; a+=ICache::line_size) {
for (address a = start; a < end; a += ICache::line_size) {
__asm__ __volatile__(
"dcbst 0, %0 \n"
:
: "r" (a)
: "memory");
"dcbst 0, %0 \n"
:
: "r" (a)
: "memory");
}
// sync instruction
@ -49,20 +49,20 @@ int ppc64_flush_icache(address start, int lines, int magic){
: "memory");
// invalidate respective cache lines in instruction cache
for (address a=start; a<end; a+=ICache::line_size) {
for (address a = start; a < end; a += ICache::line_size) {
__asm__ __volatile__(
"icbi 0, %0 \n"
:
: "r" (a)
: "memory");
"icbi 0, %0 \n"
:
: "r" (a)
: "memory");
}
// discard fetched instructions
__asm__ __volatile__(
"isync \n"
:
:
: "memory");
"isync \n"
:
:
: "memory");
return magic;
}
@ -70,7 +70,7 @@ int ppc64_flush_icache(address start, int lines, int magic){
void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
StubCodeMark mark(this, "ICache", "flush_icache_stub");
*flush_icache_stub = (ICache::flush_icache_stub_t)ppc64_flush_icache;
*flush_icache_stub = (ICache::flush_icache_stub_t)ICache::ppc64_flush_icache;
// First call to flush itself
ICache::invalidate_range((address)(*flush_icache_stub), 0);

View File

@ -30,15 +30,23 @@
// code, part of the processor instruction cache potentially has to be flushed.
class ICache : public AbstractICache {
friend class ICacheStubGenerator;
static int ppc64_flush_icache(address start, int lines, int magic);
public:
enum {
// On PowerPC the cache line size is 32 bytes.
stub_size = 160, // Size of the icache flush stub in bytes.
line_size = 32, // Flush instruction affects 32 bytes.
log2_line_size = 5 // log2(line_size)
// Actually, cache line size is 64, but keeping it as it is to be
// on the safe side on ALL PPC64 implementations.
log2_line_size = 5,
line_size = 1 << log2_line_size
};
// Use default implementation
static void ppc64_flush_icache_bytes(address start, int bytes) {
// Align start address to an icache line boundary and transform
// nbytes to an icache line count.
const uint line_offset = mask_address_bits(start, line_size - 1);
ppc64_flush_icache(start - line_offset, (bytes + line_offset + line_size - 1) >> log2_line_size, 0);
}
};
#endif // CPU_PPC_VM_ICACHE_PPC_HPP

View File

@ -30,13 +30,21 @@
#include "interp_masm_ppc_64.hpp"
#include "interpreter/interpreterRuntime.hpp"
#ifdef PRODUCT
#define BLOCK_COMMENT(str) // nothing
#else
#define BLOCK_COMMENT(str) block_comment(str)
#endif
void InterpreterMacroAssembler::null_check_throw(Register a, int offset, Register temp_reg) {
#ifdef CC_INTERP
address exception_entry = StubRoutines::throw_NullPointerException_at_call_entry();
#else
address exception_entry = Interpreter::throw_NullPointerException_entry();
#endif
MacroAssembler::null_check_throw(a, offset, temp_reg, exception_entry);
}
// Lock object
//
// Registers alive
@ -47,7 +55,7 @@
void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
if (UseHeavyMonitors) {
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
monitor, /*check_for_exceptions=*/false);
monitor, /*check_for_exceptions=*/true CC_INTERP_ONLY(&& false));
} else {
// template code:
//
@ -69,7 +77,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
const Register tmp = R10_ARG8;
Label done;
Label slow_case;
Label cas_failed, slow_case;
assert_different_registers(displaced_header, object_mark_addr, current_header, tmp);
@ -91,7 +99,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
// Initialize the box (Must happen before we update the object mark!).
std(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
BasicLock::displaced_header_offset_in_bytes(), monitor);
BasicLock::displaced_header_offset_in_bytes(), monitor);
// if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
@ -106,12 +114,14 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
/*compare_value=*/displaced_header, /*exchange_value=*/monitor,
/*where=*/object_mark_addr,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
MacroAssembler::cmpxchgx_hint_acquire_lock());
MacroAssembler::cmpxchgx_hint_acquire_lock(),
noreg,
&cas_failed);
// If the compare-and-exchange succeeded, then we found an unlocked
// object and we have now locked it.
beq(CCR0, done);
b(done);
bind(cas_failed);
// } else if (THREAD->is_lock_owned((address)displaced_header))
// // Simple recursive case.
@ -134,7 +144,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
bne(CCR0, slow_case);
release();
std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
BasicLock::displaced_header_offset_in_bytes(), monitor);
BasicLock::displaced_header_offset_in_bytes(), monitor);
b(done);
@ -146,7 +156,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
// slow case of monitor enter.
bind(slow_case);
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
monitor, /*check_for_exceptions=*/false);
monitor, /*check_for_exceptions=*/true CC_INTERP_ONLY(&& false));
// }
bind(done);
@ -160,7 +170,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
// which must be initialized with the object to lock.
//
// Throw IllegalMonitorException if object is not locked by current thread.
void InterpreterMacroAssembler::unlock_object(Register monitor) {
void InterpreterMacroAssembler::unlock_object(Register monitor, bool check_for_exceptions) {
if (UseHeavyMonitors) {
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
monitor, /*check_for_exceptions=*/false);
@ -184,9 +194,8 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
const Register object_mark_addr = R9_ARG7;
const Register current_header = R10_ARG8;
Label no_recursive_unlock;
Label free_slot;
Label slow_case;
Label done;
assert_different_registers(object, displaced_header, object_mark_addr, current_header);
@ -194,7 +203,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
// The object address from the monitor is in object.
ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
biased_locking_exit(CCR0, object, displaced_header, done);
biased_locking_exit(CCR0, object, displaced_header, free_slot);
}
// Test first if we are in the fast recursive case.
@ -203,13 +212,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
// If the displaced header is zero, we have a recursive unlock.
cmpdi(CCR0, displaced_header, 0);
bne(CCR0, no_recursive_unlock);
// Release in recursive unlock is not necessary.
// release();
std(displaced_header/*==0!*/, BasicObjectLock::obj_offset_in_bytes(), monitor);
b(done);
bind(no_recursive_unlock);
beq(CCR0, free_slot); // recursive unlock
// } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
// // We swapped the unlocked mark in displaced_header into the object's mark word.
@ -218,7 +221,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
// If we still have a lightweight lock, unlock the object and be done.
// The object address from the monitor is in object.
ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
if (!UseBiasedLocking) ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());
// We have the displaced header in displaced_header. If the lock is still
@ -229,17 +232,11 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
/*current_value=*/current_header,
/*compare_value=*/monitor, /*exchange_value=*/displaced_header,
/*where=*/object_mark_addr,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
MacroAssembler::cmpxchgx_hint_release_lock());
bne(CCR0, slow_case);
// Exchange worked, do monitor->set_obj(NULL).
li(R0, 0);
// Must realease earlier (see cmpxchgd above).
// release();
std(R0, BasicObjectLock::obj_offset_in_bytes(), monitor);
b(done);
MacroAssembler::MemBarRel,
MacroAssembler::cmpxchgx_hint_release_lock(),
noreg,
&slow_case);
b(free_slot);
// } else {
// // Slow path.
@ -249,9 +246,17 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
// we need to get into the slow case.
bind(slow_case);
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
monitor, /*check_for_exceptions=*/false);
monitor, check_for_exceptions CC_INTERP_ONLY(&& false));
// }
Label done;
b(done); // Monitor register may be overwritten! Runtime has already freed the slot.
// Exchange worked, do monitor->set_obj(NULL);
align(32, 12);
bind(free_slot);
li(R0, 0);
std(R0, BasicObjectLock::obj_offset_in_bytes(), monitor);
bind(done);
}
}
@ -375,6 +380,7 @@ void InterpreterMacroAssembler::notify_method_exit(bool is_native_method, TosSta
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit),
/*check_exceptions=*/false);
align(32, 12);
bind(jvmti_post_done);
}
}

View File

@ -37,6 +37,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
public:
InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
void null_check_throw(Register a, int offset, Register temp_reg);
// Handy address generation macros
#define thread_(field_name) in_bytes(JavaThread::field_name ## _offset()), R16_thread
#define method_(field_name) in_bytes(Method::field_name ## _offset()), R19_method
@ -51,15 +53,16 @@ class InterpreterMacroAssembler: public MacroAssembler {
// Object locking
void lock_object (Register lock_reg, Register obj_reg);
void unlock_object(Register lock_reg);
void unlock_object(Register lock_reg, bool check_for_exceptions = true);
// Debugging
void verify_oop(Register reg, TosState state = atos); // only if +VerifyOops && state == atos
// support for jvmdi/jvmpi
void notify_method_entry();
void notify_method_exit(bool save_result, TosState state);
void notify_method_exit(bool is_native_method, TosState state);
#ifdef CC_INTERP
// Convert the current TOP_IJAVA_FRAME into a PARENT_IJAVA_FRAME
// (using parent_frame_resize) and push a new interpreter
// TOP_IJAVA_FRAME (using frame_size).
@ -84,6 +87,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
void pop_interpreter_state(bool prev_state_may_be_0);
void restore_prev_state();
#endif
};
#endif // CPU_PPC_VM_INTERP_MASM_PPC_64_HPP

View File

@ -396,18 +396,14 @@ address AbstractInterpreterGenerator::generate_result_handler_for(BasicType type
//
Label done;
Label is_false;
address entry = __ pc();
switch (type) {
case T_BOOLEAN:
__ cmpwi(CCR0, R3_RET, 0);
__ beq(CCR0, is_false);
__ li(R3_RET, 1);
__ b(done);
__ bind(is_false);
__ li(R3_RET, 0);
// convert !=0 to 1
__ neg(R0, R3_RET);
__ orr(R0, R3_RET, R0);
__ srwi(R3_RET, R0, 31);
break;
case T_BYTE:
// sign extend 8 bits
@ -478,7 +474,7 @@ address InterpreterGenerator::generate_abstract_entry(void) {
// Push a new C frame and save LR.
__ save_LR_CR(R0);
__ push_frame_abi112_nonvolatiles(0, R11_scratch1);
__ push_frame_abi112(0, R11_scratch1);
// This is not a leaf but we have a JavaFrameAnchor now and we will
// check (create) exceptions afterward so this is ok.
@ -491,8 +487,12 @@ address InterpreterGenerator::generate_abstract_entry(void) {
// Reset JavaFrameAnchor from call_VM_leaf above.
__ reset_last_Java_frame();
#ifdef CC_INTERP
// Return to frame manager, it will handle the pending exception.
__ blr();
#else
Unimplemented();
#endif
return entry;
}
@ -503,16 +503,20 @@ address InterpreterGenerator::generate_accessor_entry(void) {
if(!UseFastAccessorMethods && (!FLAG_IS_ERGO(UseFastAccessorMethods)))
return NULL;
Label Ldone, Lslow_path;
Label Lslow_path, Lacquire;
const Register Rthis = R3_ARG1,
const Register
Rclass_or_obj = R3_ARG1,
Rconst_method = R4_ARG2,
Rcodes = Rconst_method,
Rcpool_cache = R5_ARG3,
Rscratch = R11_scratch1,
Rjvmti_mode = Rscratch,
Roffset = R12_scratch2,
Rflags = R6_ARG4;
Rflags = R6_ARG4,
Rbtable = R7_ARG5;
static address branch_table[number_of_states];
address entry = __ pc();
@ -521,13 +525,9 @@ address InterpreterGenerator::generate_accessor_entry(void) {
// Also check for JVMTI mode
// Check for null obj, take slow path if so.
#ifdef CC_INTERP
__ ld(Rthis, Interpreter::stackElementSize, R17_tos);
#else
Unimplemented()
#endif
__ ld(Rclass_or_obj, Interpreter::stackElementSize, CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp));
__ lwz(Rjvmti_mode, thread_(interp_only_mode));
__ cmpdi(CCR1, Rthis, 0);
__ cmpdi(CCR1, Rclass_or_obj, 0);
__ cmpwi(CCR0, Rjvmti_mode, 0);
__ crorc(/*CCR0 eq*/2, /*CCR1 eq*/4+2, /*CCR0 eq*/2);
__ beq(CCR0, Lslow_path); // this==null or jvmti_mode!=0
@ -560,58 +560,127 @@ address InterpreterGenerator::generate_accessor_entry(void) {
__ ld(Rflags, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::flags_offset()), Rcpool_cache);
__ ld(Roffset, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f2_offset()), Rcpool_cache);
// Get field type.
// (Rflags>>ConstantPoolCacheEntry::tos_state_shift)&((1<<ConstantPoolCacheEntry::tos_state_bits)-1)
// Following code is from templateTable::getfield_or_static
// Load pointer to branch table
__ load_const_optimized(Rbtable, (address)branch_table, Rscratch);
// Get volatile flag
__ rldicl(Rscratch, Rflags, 64-ConstantPoolCacheEntry::is_volatile_shift, 63); // extract volatile bit
// note: sync is needed before volatile load on PPC64
// Check field type
__ rldicl(Rflags, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);
#ifdef ASSERT
__ ld(R9_ARG7, 0, R1_SP);
__ ld(R10_ARG8, 0, R21_sender_SP);
__ cmpd(CCR0, R9_ARG7, R10_ARG8);
__ asm_assert_eq("backlink", 0x543);
Label LFlagInvalid;
__ cmpldi(CCR0, Rflags, number_of_states);
__ bge(CCR0, LFlagInvalid);
__ ld(R9_ARG7, 0, R1_SP);
__ ld(R10_ARG8, 0, R21_sender_SP);
__ cmpd(CCR0, R9_ARG7, R10_ARG8);
__ asm_assert_eq("backlink", 0x543);
#endif // ASSERT
__ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
// Load the return value according to field type.
Label Litos, Lltos, Lbtos, Lctos, Lstos;
__ cmpdi(CCR1, Rflags, itos);
__ cmpdi(CCR0, Rflags, ltos);
__ beq(CCR1, Litos);
__ beq(CCR0, Lltos);
__ cmpdi(CCR1, Rflags, btos);
__ cmpdi(CCR0, Rflags, ctos);
__ beq(CCR1, Lbtos);
__ beq(CCR0, Lctos);
__ cmpdi(CCR1, Rflags, stos);
__ beq(CCR1, Lstos);
// Load from branch table and dispatch (volatile case: one instruction ahead)
__ sldi(Rflags, Rflags, LogBytesPerWord);
__ cmpwi(CCR6, Rscratch, 1); // volatile?
__ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // volatile ? size of 1 instruction : 0
__ ldx(Rbtable, Rbtable, Rflags);
__ subf(Rbtable, Rscratch, Rbtable); // point to volatile/non-volatile entry point
__ mtctr(Rbtable);
__ bctr();
#ifdef ASSERT
__ cmpdi(CCR0, Rflags, atos);
__ asm_assert_eq("what type is this?", 0x432);
__ bind(LFlagInvalid);
__ stop("got invalid flag", 0x6541);
bool all_uninitialized = true,
all_initialized = true;
for (int i = 0; i<number_of_states; ++i) {
all_uninitialized = all_uninitialized && (branch_table[i] == NULL);
all_initialized = all_initialized && (branch_table[i] != NULL);
}
assert(all_uninitialized != all_initialized, "consistency"); // either or
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
if (branch_table[vtos] == 0) branch_table[vtos] = __ pc(); // non-volatile_entry point
if (branch_table[dtos] == 0) branch_table[dtos] = __ pc(); // non-volatile_entry point
if (branch_table[ftos] == 0) branch_table[ftos] = __ pc(); // non-volatile_entry point
__ stop("unexpected type", 0x6551);
#endif
// fallthru: __ bind(Latos);
__ load_heap_oop(R3_RET, (RegisterOrConstant)Roffset, Rthis);
if (branch_table[itos] == 0) { // generate only once
__ align(32, 28, 28); // align load
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
branch_table[itos] = __ pc(); // non-volatile_entry point
__ lwax(R3_RET, Rclass_or_obj, Roffset);
__ beq(CCR6, Lacquire);
__ blr();
}
if (branch_table[ltos] == 0) { // generate only once
__ align(32, 28, 28); // align load
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
branch_table[ltos] = __ pc(); // non-volatile_entry point
__ ldx(R3_RET, Rclass_or_obj, Roffset);
__ beq(CCR6, Lacquire);
__ blr();
}
if (branch_table[btos] == 0) { // generate only once
__ align(32, 28, 28); // align load
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
branch_table[btos] = __ pc(); // non-volatile_entry point
__ lbzx(R3_RET, Rclass_or_obj, Roffset);
__ extsb(R3_RET, R3_RET);
__ beq(CCR6, Lacquire);
__ blr();
}
if (branch_table[ctos] == 0) { // generate only once
__ align(32, 28, 28); // align load
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
branch_table[ctos] = __ pc(); // non-volatile_entry point
__ lhzx(R3_RET, Rclass_or_obj, Roffset);
__ beq(CCR6, Lacquire);
__ blr();
}
if (branch_table[stos] == 0) { // generate only once
__ align(32, 28, 28); // align load
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
branch_table[stos] = __ pc(); // non-volatile_entry point
__ lhax(R3_RET, Rclass_or_obj, Roffset);
__ beq(CCR6, Lacquire);
__ blr();
}
if (branch_table[atos] == 0) { // generate only once
__ align(32, 28, 28); // align load
__ sync(); // volatile entry point (one instruction before non-volatile_entry point)
branch_table[atos] = __ pc(); // non-volatile_entry point
__ load_heap_oop(R3_RET, (RegisterOrConstant)Roffset, Rclass_or_obj);
__ verify_oop(R3_RET);
//__ dcbt(R3_RET); // prefetch
__ beq(CCR6, Lacquire);
__ blr();
}
__ align(32, 12);
__ bind(Lacquire);
__ twi_0(R3_RET);
__ isync(); // acquire
__ blr();
__ bind(Litos);
__ lwax(R3_RET, Rthis, Roffset);
__ blr();
__ bind(Lltos);
__ ldx(R3_RET, Rthis, Roffset);
__ blr();
__ bind(Lbtos);
__ lbzx(R3_RET, Rthis, Roffset);
__ extsb(R3_RET, R3_RET);
__ blr();
__ bind(Lctos);
__ lhzx(R3_RET, Rthis, Roffset);
__ blr();
__ bind(Lstos);
__ lhax(R3_RET, Rthis, Roffset);
__ blr();
#ifdef ASSERT
for (int i = 0; i<number_of_states; ++i) {
assert(branch_table[i], "accessor_entry initialization");
//tty->print_cr("accessor_entry: branch_table[%d] = 0x%llx (opcode 0x%llx)", i, branch_table[i], *((unsigned int*)branch_table[i]));
}
#endif
__ bind(Lslow_path);
assert(Interpreter::entry_for_kind(Interpreter::zerolocals), "Normal entry must have been generated by now");
@ -670,18 +739,14 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
// continue and the thread will safepoint at the next bytecode dispatch.
// If the receiver is null then it is OK to jump to the slow path.
#ifdef CC_INTERP
__ ld(R3_RET, Interpreter::stackElementSize, R17_tos); // get receiver
#else
Unimplemented();
#endif
__ ld(R3_RET, Interpreter::stackElementSize, CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp)); // get receiver
// Check if receiver == NULL and go the slow path.
__ cmpdi(CCR0, R3_RET, 0);
__ beq(CCR0, slow_path);
// Load the value of the referent field.
__ load_heap_oop_not_null(R3_RET, referent_offset, R3_RET);
__ load_heap_oop(R3_RET, referent_offset, R3_RET);
// Generate the G1 pre-barrier code to log the value of
// the referent field in an SATB buffer. Note with

View File

@ -40,8 +40,10 @@
#define JNIIMPORT
#endif
#define JNICALL
typedef int jint;
#define JNICALL
typedef int jint;
#if defined(_LP64)
typedef long jlong;
#else

View File

@ -97,8 +97,10 @@ void MacroAssembler::store_sized_value(Register dst, RegisterOrConstant offs, Re
}
}
void MacroAssembler::align(int modulus) {
while (offset() % modulus != 0) nop();
void MacroAssembler::align(int modulus, int max, int rem) {
int padding = (rem + modulus - (offset() % modulus)) % modulus;
if (padding > max) return;
for (int c = (padding >> 2); c > 0; --c) { nop(); }
}
// Issue instructions that calculate given TOC from global TOC.
@ -186,16 +188,25 @@ address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr
#ifdef _LP64
// Patch compressed oops or klass constants.
// Assembler sequence is
// 1) compressed oops:
// lis rx = const.hi
// ori rx = rx | const.lo
// 2) compressed klass:
// lis rx = const.hi
// clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
// ori rx = rx | const.lo
// Clrldi will be passed by.
int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
assert(UseCompressedOops, "Should only patch compressed oops");
const address inst2_addr = a;
const int inst2 = *(int *)inst2_addr;
// The relocation points to the second instruction, the addi,
// and the addi reads and writes the same register dst.
const int dst = inv_rt_field(inst2);
assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
// The relocation points to the second instruction, the ori,
// and the ori reads and writes the same register dst.
const int dst = inv_rta_field(inst2);
assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst");
// Now, find the preceding addis which writes to dst.
int inst1 = 0;
address inst1_addr = inst2_addr - BytesPerInstWord;
@ -210,8 +221,9 @@ int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop dat
int xc = (data >> 16) & 0xffff;
int xd = (data >> 0) & 0xffff;
set_imm((int *)inst1_addr,((short)(xc + ((xd & 0x8000) != 0 ? 1 : 0)))); // see enc_load_con_narrow1/2
set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
set_imm((int *)inst2_addr, (short)(xd));
return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
}
@ -222,10 +234,10 @@ narrowOop MacroAssembler::get_narrow_oop(address a, address bound) {
const address inst2_addr = a;
const int inst2 = *(int *)inst2_addr;
// The relocation points to the second instruction, the addi,
// and the addi reads and writes the same register dst.
const int dst = inv_rt_field(inst2);
assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
// The relocation points to the second instruction, the ori,
// and the ori reads and writes the same register dst.
const int dst = inv_rta_field(inst2);
assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst");
// Now, find the preceding lis which writes to dst.
int inst1 = 0;
address inst1_addr = inst2_addr - BytesPerInstWord;
@ -238,8 +250,9 @@ narrowOop MacroAssembler::get_narrow_oop(address a, address bound) {
}
assert(inst1_found, "inst is not lis");
uint xl = ((unsigned int) (get_imm(inst2_addr,0) & 0xffff));
uint xh = (((((xl & 0x8000) != 0 ? -1 : 0) + get_imm(inst1_addr,0)) & 0xffff) << 16);
uint xl = ((unsigned int) (get_imm(inst2_addr, 0) & 0xffff));
uint xh = (((get_imm(inst1_addr, 0)) & 0xffff) << 16);
return (int) (xl | xh);
}
#endif // _LP64
@ -252,13 +265,10 @@ void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a,
// FIXME: We should insert relocation information for oops at the constant
// pool entries instead of inserting it at the loads; patching of a constant
// pool entry should be less expensive.
Unimplemented();
if (false) {
address oop_address = address_constant((address)a.value(), RelocationHolder::none);
// Relocate at the pc of the load.
relocate(a.rspec());
toc_offset = (int)(oop_address - code()->consts()->start());
}
address oop_address = address_constant((address)a.value(), RelocationHolder::none);
// Relocate at the pc of the load.
relocate(a.rspec());
toc_offset = (int)(oop_address - code()->consts()->start());
ld_largeoffset_unchecked(dst, toc_offset, toc, true);
}
@ -532,7 +542,7 @@ void MacroAssembler::set_dest_of_bc_far_at(address instruction_addr, address des
masm.b(dest);
}
}
ICache::invalidate_range(instruction_addr, code_size);
ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
}
// Emit a NOT mt-safe patchable 64 bit absolute call/jump.
@ -673,7 +683,7 @@ void MacroAssembler::set_dest_of_bxx64_patchable_at(address instruction_addr, ad
CodeBuffer buf(instruction_addr, code_size);
MacroAssembler masm(&buf);
masm.bxx64_patchable(dest, relocInfo::none, link);
ICache::invalidate_range(instruction_addr, code_size);
ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
}
// Get dest address of a bxx64_patchable instruction.
@ -964,6 +974,14 @@ address MacroAssembler::call_c(Register fd) {
/*load env=*/true);
}
address MacroAssembler::call_c_and_return_to_caller(Register fd) {
return branch_to(fd, /*and_link=*/false,
/*save toc=*/false,
/*restore toc=*/false,
/*load toc=*/true,
/*load env=*/true);
}
address MacroAssembler::call_c(const FunctionDescriptor* fd, relocInfo::relocType rt) {
if (rt != relocInfo::none) {
// this call needs to be relocatable
@ -2315,7 +2333,7 @@ void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Ja
if (last_Java_pc != noreg)
std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread);
// set last_Java_sp last
// Set last_Java_sp last.
std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread);
}
@ -2454,6 +2472,57 @@ void MacroAssembler::reinit_heapbase(Register d, Register tmp) {
}
}
// Clear Array
// Kills both input registers. tmp == R0 is allowed.
void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
// Procedure for large arrays (uses data cache block zero instruction).
Label startloop, fast, fastloop, small_rest, restloop, done;
const int cl_size = VM_Version::get_cache_line_size(),
cl_dwords = cl_size>>3,
cl_dw_addr_bits = exact_log2(cl_dwords),
dcbz_min = 1; // Min count of dcbz executions, needs to be >0.
//2:
cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included).
blt(CCR1, small_rest); // Too small.
rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line.
beq(CCR0, fast); // Already 128byte aligned.
subfic(tmp, tmp, cl_dwords);
mtctr(tmp); // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
subf(cnt_dwords, tmp, cnt_dwords); // rest.
li(tmp, 0);
//10:
bind(startloop); // Clear at the beginning to reach 128byte boundary.
std(tmp, 0, base_ptr); // Clear 8byte aligned block.
addi(base_ptr, base_ptr, 8);
bdnz(startloop);
//13:
bind(fast); // Clear 128byte blocks.
srdi(tmp, cnt_dwords, cl_dw_addr_bits); // Loop count for 128byte loop (>0).
andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
mtctr(tmp); // Load counter.
//16:
bind(fastloop);
dcbz(base_ptr); // Clear 128byte aligned block.
addi(base_ptr, base_ptr, cl_size);
bdnz(fastloop);
if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
//20:
bind(small_rest);
cmpdi(CCR0, cnt_dwords, 0); // size 0?
beq(CCR0, done); // rest == 0
li(tmp, 0);
mtctr(cnt_dwords); // Load counter.
//24:
bind(restloop); // Clear rest.
std(tmp, 0, base_ptr); // Clear 8byte aligned block.
addi(base_ptr, base_ptr, 8);
bdnz(restloop);
//27:
bind(done);
}
/////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
// Search for a single jchar in an jchar[].
@ -2926,12 +2995,11 @@ void MacroAssembler::verify_oop(Register oop, const char* msg) {
if (!VerifyOops) {
return;
}
// will be preserved.
// Will be preserved.
Register tmp = R11;
assert(oop != tmp, "precondition");
unsigned int nbytes_save = 10*8; // 10 volatile gprs
address/* FunctionDescriptor** */fd =
StubRoutines::verify_oop_subroutine_entry_address();
address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address();
// save tmp
mr(R0, tmp);
// kill tmp

View File

@ -58,9 +58,24 @@ class MacroAssembler: public Assembler {
// Move register if destination register and target register are different
inline void mr_if_needed(Register rd, Register rs);
inline void fmr_if_needed(FloatRegister rd, FloatRegister rs);
// This is dedicated for emitting scheduled mach nodes. For better
// readability of the ad file I put it here.
// Endgroups are not needed if
// - the scheduler is off
// - the scheduler found that there is a natural group end, in that
// case it reduced the size of the instruction used in the test
// yielding 'needed'.
inline void endgroup_if_needed(bool needed);
// Memory barriers.
inline void membar(int bits);
inline void release();
inline void acquire();
inline void fence();
// nop padding
void align(int modulus);
void align(int modulus, int max = 252, int rem = 0);
//
// Constants, loading constants, TOC support
@ -295,6 +310,8 @@ class MacroAssembler: public Assembler {
// Call a C function via a function descriptor and use full C
// calling conventions. Updates and returns _last_calls_return_pc.
address call_c(Register function_descriptor);
// For tail calls: only branch, don't link, so callee returns to caller of this function.
address call_c_and_return_to_caller(Register function_descriptor);
address call_c(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt);
address call_c_using_toc(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt,
Register toc);
@ -320,7 +337,7 @@ class MacroAssembler: public Assembler {
// the entry point
address entry_point,
// flag which indicates if exception should be checked
bool check_exception=true
bool check_exception = true
);
// Support for VM calls. This is the base routine called by the
@ -530,9 +547,7 @@ class MacroAssembler: public Assembler {
inline void null_check_throw(Register a, int offset, Register temp_reg, address exception_entry);
// Check accessed object for null. Use SIGTRAP-based null checks on AIX.
inline void ld_with_trap_null_check(Register d, int si16, Register s1);
// Variant for heap OOPs including decompression of compressed OOPs.
inline void load_heap_oop_with_trap_null_check(Register d, RegisterOrConstant offs, Register s1);
inline void load_with_trap_null_check(Register d, int si16, Register s1);
// Load heap oop and decompress. Loaded oop may not be null.
inline void load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1 = noreg);
@ -584,6 +599,8 @@ class MacroAssembler: public Assembler {
is_trap_range_check_g(x) || is_trap_range_check_ge(x);
}
void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0);
// Needle of length 1.
void string_indexof_1(Register result, Register haystack, Register haycnt,
Register needle, jchar needleChar,
@ -630,7 +647,7 @@ class MacroAssembler: public Assembler {
// TODO: verify method and klass metadata (compare against vptr?)
void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {}
#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)

View File

@ -58,8 +58,25 @@ inline void MacroAssembler::round_to(Register r, int modulus) {
// Move register if destination register and target register are different.
inline void MacroAssembler::mr_if_needed(Register rd, Register rs) {
if(rs !=rd) mr(rd, rs);
if (rs != rd) mr(rd, rs);
}
inline void MacroAssembler::fmr_if_needed(FloatRegister rd, FloatRegister rs) {
if (rs != rd) fmr(rd, rs);
}
inline void MacroAssembler::endgroup_if_needed(bool needed) {
if (needed) {
endgroup();
}
}
inline void MacroAssembler::membar(int bits) {
// TODO: use elemental_membar(bits) for Power 8 and disable optimization of acquire-release
// (Matcher::post_membar_release where we use PPC64_ONLY(xop == Op_MemBarRelease ||))
if (bits & StoreLoad) sync(); else lwsync();
}
inline void MacroAssembler::release() { membar(LoadStore | StoreStore); }
inline void MacroAssembler::acquire() { membar(LoadLoad | LoadStore); }
inline void MacroAssembler::fence() { membar(LoadLoad | LoadStore | StoreLoad | StoreStore); }
// Address of the global TOC.
inline address MacroAssembler::global_toc() {
@ -117,13 +134,12 @@ inline bool MacroAssembler::is_calculate_address_from_global_toc_at(address a, a
inline bool MacroAssembler::is_set_narrow_oop(address a, address bound) {
const address inst2_addr = a;
const int inst2 = *(int *)a;
// The relocation points to the second instruction, the ori.
if (!is_ori(inst2)) return false;
// The relocation points to the second instruction, the addi.
if (!is_addi(inst2)) return false;
// The addi reads and writes the same register dst.
const int dst = inv_rt_field(inst2);
if (inv_ra_field(inst2) != dst) return false;
// The ori reads and writes the same register dst.
const int dst = inv_rta_field(inst2);
if (inv_rs_field(inst2) != dst) return false;
// Now, find the preceding addis which writes to dst.
int inst1 = 0;
@ -266,9 +282,10 @@ inline void MacroAssembler::trap_ic_miss_check(Register a, Register b) {
// Do an explicit null check if access to a+offset will not raise a SIGSEGV.
// Either issue a trap instruction that raises SIGTRAP, or do a compare that
// branches to exception_entry.
// No support for compressed oops (base page of heap). Does not distinguish
// No support for compressed oops (base page of heap). Does not distinguish
// loads and stores.
inline void MacroAssembler::null_check_throw(Register a, int offset, Register temp_reg, address exception_entry) {
inline void MacroAssembler::null_check_throw(Register a, int offset, Register temp_reg,
address exception_entry) {
if (!ImplicitNullChecks || needs_explicit_null_check(offset) || !os::zero_page_read_protected()) {
if (TrapBasedNullChecks) {
assert(UseSIGTRAP, "sanity");
@ -285,7 +302,7 @@ inline void MacroAssembler::null_check_throw(Register a, int offset, Register te
}
}
inline void MacroAssembler::ld_with_trap_null_check(Register d, int si16, Register s1) {
inline void MacroAssembler::load_with_trap_null_check(Register d, int si16, Register s1) {
if (!os::zero_page_read_protected()) {
if (TrapBasedNullChecks) {
trap_null_check(s1);
@ -294,17 +311,6 @@ inline void MacroAssembler::ld_with_trap_null_check(Register d, int si16, Regist
ld(d, si16, s1);
}
// Attention: No null check for loaded uncompressed OOP. Can be used for loading klass field.
inline void MacroAssembler::load_heap_oop_with_trap_null_check(Register d, RegisterOrConstant si16,
Register s1) {
if ( !os::zero_page_read_protected()) {
if (TrapBasedNullChecks) {
trap_null_check(s1);
}
}
load_heap_oop_not_null(d, si16, s1);
}
inline void MacroAssembler::load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1) {
if (UseCompressedOops) {
lwz(d, offs, s1);

View File

@ -31,12 +31,16 @@
#define __ _masm->
#ifdef CC_INTERP
#define EXCEPTION_ENTRY StubRoutines::throw_NullPointerException_at_call_entry()
#else
#define EXCEPTION_ENTRY Interpreter::throw_NullPointerException_entry()
#endif
#ifdef PRODUCT
#define BLOCK_COMMENT(str) // nothing
#define STOP(error) stop(error)
#else
#define BLOCK_COMMENT(str) __ block_comment(str)
#define STOP(error) block_comment(error); __ stop(error)
#endif
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
@ -167,7 +171,7 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
sizeof(u2), /*is_signed*/ false);
// assert(sizeof(u2) == sizeof(ConstMethod::_size_of_parameters), "");
Label L;
__ ld(temp2, __ argument_offset(temp2, temp2, 0), R17_tos);
__ ld(temp2, __ argument_offset(temp2, temp2, 0), CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp));
__ cmpd(CCR1, temp2, recv);
__ beq(CCR1, L);
__ stop("receiver not on stack");
@ -194,7 +198,7 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler*
return NULL;
}
Register argbase = R17_tos; // parameter (preserved)
Register argbase = CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp); // parameter (preserved)
Register argslot = R3;
Register temp1 = R6;
Register param_size = R7;
@ -271,7 +275,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
Register member_reg,
bool for_compiler_entry) {
assert(is_signature_polymorphic(iid), "expected invoke iid");
Register temp1 = (for_compiler_entry ? R21_tmp1 : R7);
Register temp1 = (for_compiler_entry ? R25_tmp5 : R7);
Register temp2 = (for_compiler_entry ? R22_tmp2 : R8);
Register temp3 = (for_compiler_entry ? R23_tmp3 : R9);
Register temp4 = (for_compiler_entry ? R24_tmp4 : R10);
@ -295,11 +299,10 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
__ verify_oop(receiver_reg);
if (iid == vmIntrinsics::_linkToSpecial) {
// Don't actually load the klass; just null-check the receiver.
__ null_check_throw(receiver_reg, 0, temp1, StubRoutines::throw_NullPointerException_at_call_entry());
__ null_check_throw(receiver_reg, -1, temp1, EXCEPTION_ENTRY);
} else {
// load receiver klass itself
__ null_check_throw(receiver_reg, oopDesc::klass_offset_in_bytes(),
temp1, StubRoutines::throw_NullPointerException_at_call_entry());
__ null_check_throw(receiver_reg, oopDesc::klass_offset_in_bytes(), temp1, EXCEPTION_ENTRY);
__ load_klass(temp1_recv_klass, receiver_reg);
__ verify_klass_ptr(temp1_recv_klass);
}
@ -451,7 +454,7 @@ void trace_method_handle_stub(const char* adaptername,
if (Verbose) {
tty->print_cr("Registers:");
const int abi_offset = frame::abi_112_size / 8;
for (int i = R3->encoding(); i <= R13->encoding(); i++) {
for (int i = R3->encoding(); i <= R12->encoding(); i++) {
Register r = as_Register(i);
int count = i - R3->encoding();
// The registers are stored in reverse order on the stack (by save_volatile_gprs(R1_SP, abi_112_size)).
@ -490,7 +493,7 @@ void trace_method_handle_stub(const char* adaptername,
trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
}
// safely create a frame and call frame::describe
// Safely create a frame and call frame::describe.
intptr_t *dump_sp = trace_calling_frame.sender_sp();
frame dump_frame = frame(dump_sp);
@ -531,7 +534,7 @@ void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adapt
__ mr(R6_ARG4, R1_SP);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub));
__ restore_volatile_gprs(R1_SP, 112); // except R0
__ restore_volatile_gprs(R1_SP, 112); // Except R0.
__ pop_frame();
__ restore_LR_CR(R0);

View File

@ -118,7 +118,7 @@ void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
a->bl(trampoline_stub_addr);
}
ICache::invalidate_range(addr_call, code_size);
ICache::ppc64_flush_icache_bytes(addr_call, code_size);
}
address NativeCall::get_trampoline() {
@ -182,11 +182,13 @@ address NativeMovConstReg::next_instruction_address() const {
intptr_t NativeMovConstReg::data() const {
address addr = addr_at(0);
CodeBlob* cb = CodeCache::find_blob_unsafe(addr);
if (MacroAssembler::is_load_const_at(addr)) {
return MacroAssembler::get_const(addr);
} else if (MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) {
}
CodeBlob* cb = CodeCache::find_blob_unsafe(addr);
if (MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) {
narrowOop no = (narrowOop)MacroAssembler::get_narrow_oop(addr, cb->content_begin());
return cast_from_oop<intptr_t>(oopDesc::decode_heap_oop(no));
} else {
@ -213,19 +215,24 @@ address NativeMovConstReg::set_data_plain(intptr_t data, CodeBlob *cb) {
} else if (cb != NULL &&
MacroAssembler::is_calculate_address_from_global_toc_at(addr, cb->content_begin())) {
// A calculation relative to the global TOC.
const int invalidated_range =
MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(),
(address)data);
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
// FIXME:
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
ICache::invalidate_range(start, range);
if (MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr, cb->content_begin()) !=
(address)data) {
const int invalidated_range =
MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(),
(address)data);
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
// FIXME:
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
ICache::ppc64_flush_icache_bytes(start, range);
}
next_address = addr + 1 * BytesPerInstWord;
} else if (MacroAssembler::is_load_const_at(addr)) {
// A normal 5 instruction load_const code sequence.
// This is not mt safe, ok in methods like CodeBuffer::copy_code().
MacroAssembler::patch_const(addr, (long)data);
ICache::invalidate_range(addr, load_const_instruction_size);
if (MacroAssembler::get_const(addr) != (long)data) {
// This is not mt safe, ok in methods like CodeBuffer::copy_code().
MacroAssembler::patch_const(addr, (long)data);
ICache::ppc64_flush_icache_bytes(addr, load_const_instruction_size);
}
next_address = addr + 5 * BytesPerInstWord;
} else if (MacroAssembler::is_bl(* (int*) addr)) {
// A single branch-and-link instruction.
@ -234,7 +241,7 @@ address NativeMovConstReg::set_data_plain(intptr_t data, CodeBlob *cb) {
CodeBuffer cb(addr, code_size + 1);
MacroAssembler* a = new MacroAssembler(&cb);
a->bl((address) data);
ICache::invalidate_range(addr, code_size);
ICache::ppc64_flush_icache_bytes(addr, code_size);
next_address = addr + code_size;
} else {
ShouldNotReachHere();
@ -279,12 +286,13 @@ void NativeMovConstReg::set_data(intptr_t data) {
void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL */) {
address addr = addr_at(0);
CodeBlob* cb = (code) ? code : CodeCache::find_blob(instruction_address());
if (MacroAssembler::get_narrow_oop(addr, cb->content_begin()) == (long)data) return;
const int invalidated_range =
MacroAssembler::patch_set_narrow_oop(addr, cb->content_begin(), (long)data);
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
// FIXME:
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
ICache::invalidate_range(start, range);
ICache::ppc64_flush_icache_bytes(start, range);
}
// Do not use an assertion here. Let clients decide whether they only
@ -292,15 +300,16 @@ void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL
#ifdef ASSERT
void NativeMovConstReg::verify() {
address addr = addr_at(0);
CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // find_nmethod() asserts if nmethod is zombie.
if (! MacroAssembler::is_load_const_at(addr) &&
! MacroAssembler::is_load_const_from_method_toc_at(addr) &&
! (cb != NULL && MacroAssembler::is_calculate_address_from_global_toc_at(addr, cb->content_begin())) &&
! (cb != NULL && MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) &&
! MacroAssembler::is_bl(*((int*) addr))) {
tty->print_cr("not a NativeMovConstReg at " PTR_FORMAT, addr);
// TODO: PPC port Disassembler::decode(addr, 20, 20, tty);
fatal(err_msg("not a NativeMovConstReg at " PTR_FORMAT, addr));
! MacroAssembler::is_load_const_from_method_toc_at(addr)) {
CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // find_nmethod() asserts if nmethod is zombie.
if (! (cb != NULL && MacroAssembler::is_calculate_address_from_global_toc_at(addr, cb->content_begin())) &&
! (cb != NULL && MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) &&
! MacroAssembler::is_bl(*((int*) addr))) {
tty->print_cr("not a NativeMovConstReg at " PTR_FORMAT, addr);
// TODO: PPC port: Disassembler::decode(addr, 20, 20, tty);
fatal(err_msg("not a NativeMovConstReg at " PTR_FORMAT, addr));
}
}
}
#endif // ASSERT
@ -326,7 +335,7 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add
a->illtrap();
}
}
ICache::invalidate_range(verified_entry, code_size);
ICache::ppc64_flush_icache_bytes(verified_entry, code_size);
}
#ifdef ASSERT

View File

@ -132,7 +132,7 @@ inline NativeInstruction* nativeInstruction_at(address address) {
class NativeCall: public NativeInstruction {
public:
enum specific_constants {
enum ppc_specific_constants {
load_const_instruction_size = 28,
load_const_from_method_toc_instruction_size = 16,
instruction_size = 16 // Used in shared code for calls with reloc_info.
@ -240,7 +240,7 @@ inline NativeFarCall* nativeFarCall_at(address instr) {
class NativeMovConstReg: public NativeInstruction {
public:
enum specific_constants {
enum ppc_specific_constants {
load_const_instruction_size = 20,
load_const_from_method_toc_instruction_size = 8,
instruction_size = 8 // Used in shared code for calls with reloc_info.
@ -279,7 +279,7 @@ class NativeJump: public NativeInstruction {
// We use MacroAssembler::b64_patchable() for implementing a
// jump-anywhere instruction.
enum specific_constants {
enum ppc_specific_constants {
instruction_size = MacroAssembler::b64_patchable_size
};
@ -384,7 +384,6 @@ class NativeCallTrampolineStub : public NativeInstruction {
void set_destination(address new_destination);
};
inline bool is_NativeCallTrampolineStub_at(address address) {
int first_instr = *(int*)address;
return Assembler::is_addis(first_instr) &&

12059
hotspot/src/cpu/ppc/vm/ppc.ad Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,24 @@
//
// Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
// Copyright 2012, 2013 SAP AG. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//

View File

@ -30,8 +30,8 @@
#include "asm/macroAssembler.hpp"
#include "asm/register.hpp"
#include "register_ppc.hpp"
#ifdef TARGET_ARCH_MODEL_32
# include "interp_masm_32.hpp"
#ifdef TARGET_ARCH_MODEL_ppc_32
# include "interp_masm_ppc_32.hpp"
#endif
#ifdef TARGET_ARCH_MODEL_ppc_64
# include "interp_masm_ppc_64.hpp"

View File

@ -44,7 +44,7 @@ const char* RegisterImpl::name() const {
const char* ConditionRegisterImpl::name() const {
const char* names[number_of_registers] = {
"CR0", "CR1", "CR2", "CR3", "CCR4", "CCR5", "CCR6", "CCR7"
"CR0", "CR1", "CR2", "CR3", "CR4", "CR5", "CR6", "CR7"
};
return is_valid() ? names[encoding()] : "cnoreg";
}
@ -61,7 +61,7 @@ const char* FloatRegisterImpl::name() const {
const char* SpecialRegisterImpl::name() const {
const char* names[number_of_registers] = {
"SR_XER", "SR_LR", "SR_CTR", "SR_VRSAVE", "R1_SPEFSCR", "SR_PPR"
"SR_XER", "SR_LR", "SR_CTR", "SR_VRSAVE", "SR_SPEFSCR", "SR_PPR"
};
return is_valid() ? names[encoding()] : "snoreg";
}

View File

@ -60,8 +60,8 @@ typedef VMRegImpl* VMReg;
// FPSCR Floating point status and control register (volatile)
//
// CR0-CR1 Condition code fields (volatile)
// CR2-CCR4 Condition code fields (nonvolatile)
// CCR5-CCR7 Condition code fields (volatile)
// CR2-CR4 Condition code fields (nonvolatile)
// CR5-CR7 Condition code fields (volatile)
//
// ----------------------------------------------
// On processors with the VMX feature:
@ -531,7 +531,7 @@ REGISTER_DECLARATION(Register, R7_ARG5, R7); // volatile
REGISTER_DECLARATION(Register, R8_ARG6, R8); // volatile
REGISTER_DECLARATION(Register, R9_ARG7, R9); // volatile
REGISTER_DECLARATION(Register, R10_ARG8, R10); // volatile
REGISTER_DECLARATION(FloatRegister, FO_SCRATCH, F0); // volatile
REGISTER_DECLARATION(FloatRegister, F0_SCRATCH, F0); // volatile
REGISTER_DECLARATION(FloatRegister, F1_RET, F1); // volatile
REGISTER_DECLARATION(FloatRegister, F1_ARG1, F1); // volatile
REGISTER_DECLARATION(FloatRegister, F2_ARG2, F2); // volatile
@ -560,7 +560,7 @@ REGISTER_DECLARATION(FloatRegister, F13_ARG13, F13); // volatile
#define R8_ARG6 AS_REGISTER(Register, R8)
#define R9_ARG7 AS_REGISTER(Register, R9)
#define R10_ARG8 AS_REGISTER(Register, R10)
#define FO_SCRATCH AS_REGISTER(FloatRegister, F0)
#define F0_SCRATCH AS_REGISTER(FloatRegister, F0)
#define F1_RET AS_REGISTER(FloatRegister, F1)
#define F1_ARG1 AS_REGISTER(FloatRegister, F1)
#define F2_ARG2 AS_REGISTER(FloatRegister, F2)
@ -608,7 +608,6 @@ REGISTER_DECLARATION(Register, R26_tmp6, R26);
REGISTER_DECLARATION(Register, R27_tmp7, R27);
REGISTER_DECLARATION(Register, R28_tmp8, R28);
REGISTER_DECLARATION(Register, R29_tmp9, R29);
REGISTER_DECLARATION(Register, R30_polling_page, R30);
#ifndef DONT_USE_REGISTER_DEFINES
#define R21_tmp1 AS_REGISTER(Register, R21)
#define R22_tmp2 AS_REGISTER(Register, R22)
@ -619,7 +618,6 @@ REGISTER_DECLARATION(Register, R30_polling_page, R30);
#define R27_tmp7 AS_REGISTER(Register, R27)
#define R28_tmp8 AS_REGISTER(Register, R28)
#define R29_tmp9 AS_REGISTER(Register, R29)
#define R30_polling_page AS_REGISTER(Register, R30)
#define CCR4_is_synced AS_REGISTER(ConditionRegister, CCR4)
#endif

View File

@ -0,0 +1,183 @@
/*
* Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2013 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#ifdef COMPILER2
#include "asm/assembler.inline.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "classfile/systemDictionary.hpp"
#include "code/vmreg.hpp"
#include "interpreter/interpreter.hpp"
#include "nativeInst_ppc.hpp"
#include "opto/runtime.hpp"
#include "runtime/interfaceSupport.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/vframeArray.hpp"
#include "utilities/globalDefinitions.hpp"
#include "vmreg_ppc.inline.hpp"
#endif
#define __ masm->
#ifdef COMPILER2
// SP adjustment (must use unextended SP) for method handle call sites
// during exception handling.
static intptr_t adjust_SP_for_methodhandle_callsite(JavaThread *thread) {
RegisterMap map(thread, false);
// The frame constructor will do the correction for us (see frame::adjust_unextended_SP).
frame mh_caller_frame = thread->last_frame().sender(&map);
assert(mh_caller_frame.is_compiled_frame(), "Only may reach here for compiled MH call sites");
return (intptr_t) mh_caller_frame.unextended_sp();
}
//------------------------------generate_exception_blob---------------------------
// Creates exception blob at the end.
// Using exception blob, this code is jumped from a compiled method.
//
// Given an exception pc at a call we call into the runtime for the
// handler in this method. This handler might merely restore state
// (i.e. callee save registers) unwind the frame and jump to the
// exception handler for the nmethod if there is no Java level handler
// for the nmethod.
//
// This code is entered with a jmp.
//
// Arguments:
// R3_ARG1: exception oop
// R4_ARG2: exception pc
//
// Results:
// R3_ARG1: exception oop
// R4_ARG2: exception pc in caller
// destination: exception handler of caller
//
// Note: the exception pc MUST be at a call (precise debug information)
//
void OptoRuntime::generate_exception_blob() {
// Allocate space for the code.
ResourceMark rm;
// Setup code generation tools.
CodeBuffer buffer("exception_blob", 2048, 1024);
InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
address start = __ pc();
int frame_size_in_bytes = frame::abi_112_size;
OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
// Exception pc is 'return address' for stack walker.
__ std(R4_ARG2/*exception pc*/, _abi(lr), R1_SP);
// Store the exception in the Thread object.
__ std(R3_ARG1/*exception oop*/, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
__ std(R4_ARG2/*exception pc*/, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
// Save callee-saved registers.
// Push a C frame for the exception blob. It is needed for the C call later on.
__ push_frame_abi112(0, R11_scratch1);
// This call does all the hard work. It checks if an exception handler
// exists in the method.
// If so, it returns the handler address.
// If not, it prepares for stack-unwinding, restoring the callee-save
// registers of the frame being removed.
__ set_last_Java_frame(/*sp=*/R1_SP, noreg);
__ mr(R3_ARG1, R16_thread);
__ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, OptoRuntime::handle_exception_C),
relocInfo::none);
address calls_return_pc = __ last_calls_return_pc();
# ifdef ASSERT
__ cmpdi(CCR0, R3_RET, 0);
__ asm_assert_ne("handle_exception_C must not return NULL", 0x601);
# endif
// Set an oopmap for the call site. This oopmap will only be used if we
// are unwinding the stack. Hence, all locations will be dead.
// Callee-saved registers will be the same as the frame above (i.e.,
// handle_exception_stub), since they were restored when we got the
// exception.
OopMapSet* oop_maps = new OopMapSet();
oop_maps->add_gc_map(calls_return_pc - start, map);
// Get unextended_sp for method handle call sites.
Label mh_callsite, mh_done; // Use a 2nd c call if it's a method handle call site.
__ lwa(R4_ARG2, in_bytes(JavaThread::is_method_handle_return_offset()), R16_thread);
__ cmpwi(CCR0, R4_ARG2, 0);
__ bne(CCR0, mh_callsite);
__ mtctr(R3_RET); // Move address of exception handler to SR_CTR.
__ reset_last_Java_frame();
__ pop_frame();
__ bind(mh_done);
// We have a handler in register SR_CTR (could be deopt blob).
// Get the exception oop.
__ ld(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
// Get the exception pc in case we are deoptimized.
__ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
// Reset thread values.
__ li(R0, 0);
#ifdef ASSERT
__ std(R0, in_bytes(JavaThread::exception_handler_pc_offset()), R16_thread);
__ std(R0, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
#endif
// Clear the exception oop so GC no longer processes it as a root.
__ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
// Move exception pc into SR_LR.
__ mtlr(R4_ARG2);
__ bctr();
// Same as above, but also set sp to unextended_sp.
__ bind(mh_callsite);
__ mr(R31, R3_RET); // Save branch address.
__ mr(R3_ARG1, R16_thread);
__ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, adjust_SP_for_methodhandle_callsite), relocInfo::none);
// Returns unextended_sp in R3_RET.
__ mtctr(R31); // Move address of exception handler to SR_CTR.
__ reset_last_Java_frame();
__ mr(R1_SP, R3_RET); // Set sp to unextended_sp.
__ b(mh_done);
// Make sure all code is generated.
masm->flush();
// Set exception blob.
_exception_blob = ExceptionBlob::create(&buffer, oop_maps,
frame_size_in_bytes/wordSize);
}
#endif // COMPILER2

View File

@ -687,17 +687,9 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
F13->as_VMReg()
};
const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
// The first 8 arguments are not passed on the stack.
const int num_args_in_regs = 8;
#define put_arg_in_reg(arg) ((arg) < num_args_in_regs)
// Check calling conventions consistency.
assert(num_iarg_registers == num_args_in_regs
&& num_iarg_registers == 8
&& num_farg_registers == 13,
assert(sizeof(iarg_reg) / sizeof(iarg_reg[0]) == Argument::n_int_register_parameters_c &&
sizeof(farg_reg) / sizeof(farg_reg[0]) == Argument::n_float_register_parameters_c,
"consistency");
// `Stk' counts stack slots. Due to alignment, 32 bit values occupy
@ -705,8 +697,6 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
const int inc_stk_for_intfloat = 2; // 2 slots for ints and floats
const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
int ill_i = 0;
int ill_t = 0;
int i;
VMReg reg;
// Leave room for C-compatible ABI_112.
@ -726,6 +716,11 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
if (regs2 != NULL) regs2[i].set_bad();
switch(sig_bt[i]) {
//
// If arguments 0-7 are integers, they are passed in integer registers.
// Argument i is placed in iarg_reg[i].
//
case T_BOOLEAN:
case T_CHAR:
case T_BYTE:
@ -754,7 +749,7 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
case T_ADDRESS:
case T_METADATA:
// Oops are already boxed if required (JNI).
if (put_arg_in_reg(arg)) {
if (arg < Argument::n_int_register_parameters_c) {
reg = iarg_reg[arg];
} else {
reg = VMRegImpl::stack2reg(stk);
@ -762,57 +757,66 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
}
regs[i].set2(reg);
break;
//
// Floats are treated differently from int regs: The first 13 float arguments
// are passed in registers (not the float args among the first 13 args).
// Thus argument i is NOT passed in farg_reg[i] if it is float. It is passed
// in farg_reg[j] if argument i is the j-th float argument of this call.
//
case T_FLOAT:
if (put_arg_in_reg(arg)) {
if (freg < Argument::n_float_register_parameters_c) {
// Put float in register ...
reg = farg_reg[freg];
++freg;
// Argument i for i > 8 is placed on the stack even if it's
// placed in a register (if it's a float arg). Aix disassembly
// shows that xlC places these float args on the stack AND in
// a register. This is not documented, but we follow this
// convention, too.
if (arg >= Argument::n_regs_not_on_stack_c) {
// ... and on the stack.
guarantee(regs2 != NULL, "must pass float in register and stack slot");
VMReg reg2 = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
regs2[i].set1(reg2);
stk += inc_stk_for_intfloat;
}
} else {
// Put float on stack
# if defined(LINUX)
reg = VMRegImpl::stack2reg(stk+1);
# elif defined(AIX)
reg = VMRegImpl::stack2reg(stk);
# else
# error "unknown OS"
# endif
// Put float on stack.
reg = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
stk += inc_stk_for_intfloat;
}
if (freg < num_farg_registers) {
// There are still some float argument registers left. Put the
// float in a register if not already done.
if (reg != farg_reg[freg]) {
guarantee(regs2 != NULL, "must pass float in register and stack slot");
VMReg reg2 = farg_reg[freg];
regs2[i].set1(reg2);
}
++freg;
}
regs[i].set1(reg);
break;
case T_DOUBLE:
assert(sig_bt[i+1] == T_VOID, "expecting half");
if (put_arg_in_reg(arg)) {
if (freg < Argument::n_float_register_parameters_c) {
// Put double in register ...
reg = farg_reg[freg];
++freg;
// Argument i for i > 8 is placed on the stack even if it's
// placed in a register (if it's a double arg). Aix disassembly
// shows that xlC places these float args on the stack AND in
// a register. This is not documented, but we follow this
// convention, too.
if (arg >= Argument::n_regs_not_on_stack_c) {
// ... and on the stack.
guarantee(regs2 != NULL, "must pass float in register and stack slot");
VMReg reg2 = VMRegImpl::stack2reg(stk);
regs2[i].set2(reg2);
stk += inc_stk_for_longdouble;
}
} else {
// Put double on stack.
reg = VMRegImpl::stack2reg(stk);
stk += inc_stk_for_longdouble;
}
if (freg < num_farg_registers) {
// There are still some float argument registers left. Put the
// float in a register if not already done.
if (reg != farg_reg[freg]) {
guarantee(regs2 != NULL, "must pass float in register and stack slot");
VMReg reg2 = farg_reg[freg];
regs2[i].set2(reg2);
}
++freg;
}
regs[i].set2(reg);
break;
case T_VOID:
// Do not count halves.
regs[i].set_bad();
@ -877,7 +881,7 @@ static address gen_c2i_adapter(MacroAssembler *masm,
__ mtlr(return_pc);
// call the interpreter
// Call the interpreter.
__ BIND(call_interpreter);
__ mtctr(ientry);
@ -947,8 +951,12 @@ static address gen_c2i_adapter(MacroAssembler *masm,
// Jump to the interpreter just as if interpreter was doing it.
#ifdef CC_INTERP
const Register tos = R17_tos;
#endif
// load TOS
__ addi(R17_tos, R1_SP, st_off);
__ addi(tos, R1_SP, st_off);
// Frame_manager expects initial_caller_sp (= SP without resize by c2i) in R21_tmp1.
assert(sender_SP == R21_sender_SP, "passing initial caller's SP in wrong register");
@ -982,7 +990,9 @@ static void gen_i2c_adapter(MacroAssembler *masm,
// save code can segv when fxsave instructions find improperly
// aligned stack pointer.
#ifdef CC_INTERP
const Register ld_ptr = R17_tos;
#endif
const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 };
const int num_value_regs = sizeof(value_regs) / sizeof(Register);
int value_regs_index = 0;
@ -1137,7 +1147,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
__ bne_predict_taken(CCR0, valid);
// We have a null argument, branch to ic_miss_stub.
__ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
relocInfo::runtime_call_type);
relocInfo::runtime_call_type);
__ BIND(valid);
}
}
@ -1154,7 +1164,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
__ beq_predict_taken(CCR0, valid);
// We have an unexpected klass, branch to ic_miss_stub.
__ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
relocInfo::runtime_call_type);
relocInfo::runtime_call_type);
__ BIND(valid);
}
@ -1170,8 +1180,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
__ beq_predict_taken(CCR0, call_interpreter);
// Branch to ic_miss_stub.
__ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
relocInfo::runtime_call_type);
__ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
// entry: c2i
@ -2594,7 +2603,11 @@ static void push_skeleton_frame(MacroAssembler* masm, bool deopt,
__ ld(frame_size_reg, 0, frame_sizes_reg);
__ std(pc_reg, _abi(lr), R1_SP);
__ push_frame(frame_size_reg, R0/*tmp*/);
#ifdef CC_INTERP
__ std(R1_SP, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
#else
Unimplemented();
#endif
__ addi(number_of_frames_reg, number_of_frames_reg, -1);
__ addi(frame_sizes_reg, frame_sizes_reg, wordSize);
__ addi(pcs_reg, pcs_reg, wordSize);
@ -2693,7 +2706,9 @@ static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
// Store it in the top interpreter frame.
__ std(R0, _abi(lr), R1_SP);
// Initialize frame_manager_lr of interpreter top frame.
#ifdef CC_INTERP
__ std(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
#endif
}
#endif
@ -2886,8 +2901,7 @@ void SharedRuntime::generate_deopt_blob() {
// Initialize R14_state.
__ ld(R14_state, 0, R1_SP);
__ addi(R14_state, R14_state,
-frame::interpreter_frame_cinterpreterstate_size_in_bytes());
__ addi(R14_state, R14_state, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
// Also inititialize R15_prev_state.
__ restore_prev_state();
@ -3010,8 +3024,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
// Initialize R14_state, ...
__ ld(R11_scratch1, 0, R1_SP);
__ addi(R14_state, R11_scratch1,
-frame::interpreter_frame_cinterpreterstate_size_in_bytes());
__ addi(R14_state, R11_scratch1, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
// also initialize R15_prev_state.
__ restore_prev_state();
// Return to the interpreter entry point.

View File

@ -146,14 +146,14 @@ class StubGenerator: public StubCodeGenerator {
// FIXME: use round_to() here
__ andi_(r_frame_alignment_in_bytes, r_arg_argument_count, 1);
__ sldi(r_frame_alignment_in_bytes,
r_frame_alignment_in_bytes, Interpreter::logStackElementSize);
r_frame_alignment_in_bytes, Interpreter::logStackElementSize);
// size = unaligned size of arguments + top abi's size
__ addi(r_frame_size, r_argument_size_in_bytes,
frame::top_ijava_frame_abi_size);
// size += arguments alignment
__ add(r_frame_size,
r_frame_size, r_frame_alignment_in_bytes);
r_frame_size, r_frame_alignment_in_bytes);
// size += size of call_stub locals
__ addi(r_frame_size,
r_frame_size, frame::entry_frame_locals_size);
@ -179,7 +179,7 @@ class StubGenerator: public StubCodeGenerator {
__ addi(r_top_of_arguments_addr,
R1_SP, frame::top_ijava_frame_abi_size);
__ add(r_top_of_arguments_addr,
r_top_of_arguments_addr, r_frame_alignment_in_bytes);
r_top_of_arguments_addr, r_frame_alignment_in_bytes);
// any arguments to copy?
__ cmpdi(CCR0, r_arg_argument_count, 0);
@ -229,22 +229,23 @@ class StubGenerator: public StubCodeGenerator {
// Register state on entry to frame manager / native entry:
//
// R17_tos - intptr_t* sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8
// tos - intptr_t* sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8
// R19_method - Method
// R16_thread - JavaThread*
// R17_tos must point to last argument - element_size.
__ addi(R17_tos, r_top_of_arguments_addr, -Interpreter::stackElementSize);
// Tos must point to last argument - element_size.
const Register tos = R17_tos;
__ addi(tos, r_top_of_arguments_addr, -Interpreter::stackElementSize);
// initialize call_stub locals (step 2)
// now save R17_tos as arguments_tos_address
__ std(R17_tos, _entry_frame_locals_neg(arguments_tos_address), r_entryframe_fp);
// now save tos as arguments_tos_address
__ std(tos, _entry_frame_locals_neg(arguments_tos_address), r_entryframe_fp);
// load argument registers for call
__ mr(R19_method, r_arg_method);
__ mr(R16_thread, r_arg_thread);
assert(R17_tos != r_arg_method, "trashed r_arg_method");
assert(R17_tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread");
assert(tos != r_arg_method, "trashed r_arg_method");
assert(tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread");
// Set R15_prev_state to 0 for simplifying checks in callee.
__ li(R15_prev_state, 0);
@ -274,7 +275,7 @@ class StubGenerator: public StubCodeGenerator {
// Do a light-weight C-call here, r_new_arg_entry holds the address
// of the interpreter entry point (frame manager or native entry)
// and save runtime-value of LR in return_address.
assert(r_new_arg_entry != R17_tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread,
assert(r_new_arg_entry != tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread,
"trashed r_new_arg_entry");
return_address = __ call_stub(r_new_arg_entry);
}
@ -326,8 +327,8 @@ class StubGenerator: public StubCodeGenerator {
// T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE is treated as T_INT.
__ cmpwi(CCR0, r_arg_result_type, T_OBJECT);
__ cmpwi(CCR1, r_arg_result_type, T_LONG);
__ cmpwi(CCR5, r_arg_result_type, T_FLOAT);
__ cmpwi(CCR6, r_arg_result_type, T_DOUBLE);
__ cmpwi(CCR5, r_arg_result_type, T_FLOAT);
__ cmpwi(CCR6, r_arg_result_type, T_DOUBLE);
// restore non-volatile registers
__ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14));
@ -345,8 +346,8 @@ class StubGenerator: public StubCodeGenerator {
__ beq(CCR0, ret_is_object);
__ beq(CCR1, ret_is_long);
__ beq(CCR5, ret_is_float);
__ beq(CCR6, ret_is_double);
__ beq(CCR5, ret_is_float);
__ beq(CCR6, ret_is_double);
// default:
__ stw(R3_RET, 0, r_arg_result_addr);
@ -614,6 +615,17 @@ class StubGenerator: public StubCodeGenerator {
if (!dest_uninitialized) {
const int spill_slots = 4 * wordSize;
const int frame_size = frame::abi_112_size + spill_slots;
Label filtered;
// Is marking active?
if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
__ lwz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread);
} else {
guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
__ lbz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread);
}
__ cmpdi(CCR0, Rtmp1, 0);
__ beq(CCR0, filtered);
__ save_LR_CR(R0);
__ push_frame_abi112(spill_slots, R0);
@ -628,6 +640,8 @@ class StubGenerator: public StubCodeGenerator {
__ ld(count, frame_size - 3 * wordSize, R1_SP);
__ pop_frame();
__ restore_LR_CR(R0);
__ bind(filtered);
}
break;
case BarrierSet::CardTableModRef:
@ -648,21 +662,28 @@ class StubGenerator: public StubCodeGenerator {
//
// The input registers and R0 are overwritten.
//
void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp) {
void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp, bool branchToEnd) {
BarrierSet* const bs = Universe::heap()->barrier_set();
switch (bs->kind()) {
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
__ save_LR_CR(R0);
// We need this frame only that the callee can spill LR/CR.
__ push_frame_abi112(0, R0);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
__ pop_frame();
__ restore_LR_CR(R0);
if (branchToEnd) {
__ save_LR_CR(R0);
// We need this frame only to spill LR.
__ push_frame_abi112(0, R0);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
__ pop_frame();
__ restore_LR_CR(R0);
} else {
// Tail call: fake call from stub caller by branching without linking.
address entry_point = (address)CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post);
__ mr_if_needed(R3_ARG1, addr);
__ mr_if_needed(R4_ARG2, count);
__ load_const(R11, entry_point, R0);
__ call_c_and_return_to_caller(R11);
}
}
break;
case BarrierSet::CardTableModRef:
@ -697,9 +718,12 @@ class StubGenerator: public StubCodeGenerator {
__ addi(addr, addr, 1);
__ bdnz(Lstore_loop);
__ bind(Lskip_loop);
if (!branchToEnd) __ blr();
}
break;
case BarrierSet::ModRef:
if (!branchToEnd) __ blr();
break;
default:
ShouldNotReachHere();
@ -847,30 +871,28 @@ class StubGenerator: public StubCodeGenerator {
// The code is implemented(ported from sparc) as we believe it benefits JVM98, however
// tracing(-XX:+TraceOptimizeFill) shows the intrinsic replacement doesn't happen at all!
//
// Source code in function is_range_check_if() shows OptimizeFill relaxed the condition
// Source code in function is_range_check_if() shows that OptimizeFill relaxed the condition
// for turning on loop predication optimization, and hence the behavior of "array range check"
// and "loop invariant check" could be influenced, which potentially boosted JVM98.
//
// We leave the code here and see if Oracle has updates in later releases(later than HS20).
//
// Generate stub for disjoint short fill. If "aligned" is true, the
// "to" address is assumed to be heapword aligned.
// Generate stub for disjoint short fill. If "aligned" is true, the
// "to" address is assumed to be heapword aligned.
//
// Arguments for generated stub:
// to: R3_ARG1
// value: R4_ARG2
// count: R5_ARG3 treated as signed
// to: R3_ARG1
// value: R4_ARG2
// count: R5_ARG3 treated as signed
//
address generate_fill(BasicType t, bool aligned, const char* name) {
StubCodeMark mark(this, "StubRoutines", name);
address start = __ emit_fd();
const Register to = R3_ARG1; // source array address
const Register value = R4_ARG2; // fill value
const Register count = R5_ARG3; // elements count
const Register temp = R6_ARG4; // temp register
const Register to = R3_ARG1; // source array address
const Register value = R4_ARG2; // fill value
const Register count = R5_ARG3; // elements count
const Register temp = R6_ARG4; // temp register
//assert_clean_int(count, O3); // Make sure 'count' is clean int.
//assert_clean_int(count, O3); // Make sure 'count' is clean int.
Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
Label L_fill_2_bytes, L_fill_4_bytes, L_fill_elements, L_fill_32_bytes;
@ -879,31 +901,31 @@ class StubGenerator: public StubCodeGenerator {
switch (t) {
case T_BYTE:
shift = 2;
// clone bytes (zero extend not needed because store instructions below ignore high order bytes)
// Clone bytes (zero extend not needed because store instructions below ignore high order bytes).
__ rldimi(value, value, 8, 48); // 8 bit -> 16 bit
__ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element
__ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element.
__ blt(CCR0, L_fill_elements);
__ rldimi(value, value, 16, 32); // 16 bit -> 32 bit
break;
case T_SHORT:
shift = 1;
// clone bytes (zero extend not needed because store instructions below ignore high order bytes)
// Clone bytes (zero extend not needed because store instructions below ignore high order bytes).
__ rldimi(value, value, 16, 32); // 16 bit -> 32 bit
__ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element
__ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element.
__ blt(CCR0, L_fill_elements);
break;
case T_INT:
shift = 0;
__ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element
__ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element.
__ blt(CCR0, L_fill_4_bytes);
break;
default: ShouldNotReachHere();
}
if (!aligned && (t == T_BYTE || t == T_SHORT)) {
// align source address at 4 bytes address boundary
// Align source address at 4 bytes address boundary.
if (t == T_BYTE) {
// One byte misalignment happens only for byte arrays
// One byte misalignment happens only for byte arrays.
__ andi_(temp, to, 1);
__ beq(CCR0, L_skip_align1);
__ stb(value, 0, to);
@ -930,12 +952,12 @@ class StubGenerator: public StubCodeGenerator {
__ bind(L_fill_32_bytes);
}
__ li(temp, 8<<shift); // prepare for 32 byte loop
// clone bytes int->long as above
__ rldimi(value, value, 32, 0); // 32 bit -> 64 bit
__ li(temp, 8<<shift); // Prepare for 32 byte loop.
// Clone bytes int->long as above.
__ rldimi(value, value, 32, 0); // 32 bit -> 64 bit
Label L_check_fill_8_bytes;
// Fill 32-byte chunks
// Fill 32-byte chunks.
__ subf_(count, temp, count);
__ blt(CCR0, L_check_fill_8_bytes);
@ -945,7 +967,7 @@ class StubGenerator: public StubCodeGenerator {
__ std(value, 0, to);
__ std(value, 8, to);
__ subf_(count, temp, count); // update count
__ subf_(count, temp, count); // Update count.
__ std(value, 16, to);
__ std(value, 24, to);
@ -968,7 +990,7 @@ class StubGenerator: public StubCodeGenerator {
__ addi(to, to, 8);
__ bge(CCR0, L_fill_8_bytes_loop);
// fill trailing 4 bytes
// Fill trailing 4 bytes.
__ bind(L_fill_4_bytes);
__ andi_(temp, count, 1<<shift);
__ beq(CCR0, L_fill_2_bytes);
@ -976,14 +998,14 @@ class StubGenerator: public StubCodeGenerator {
__ stw(value, 0, to);
if (t == T_BYTE || t == T_SHORT) {
__ addi(to, to, 4);
// fill trailing 2 bytes
// Fill trailing 2 bytes.
__ bind(L_fill_2_bytes);
__ andi_(temp, count, 1<<(shift-1));
__ beq(CCR0, L_fill_byte);
__ sth(value, 0, to);
if (t == T_BYTE) {
__ addi(to, to, 2);
// fill trailing byte
// Fill trailing byte.
__ bind(L_fill_byte);
__ andi_(count, count, 1);
__ beq(CCR0, L_exit);
@ -997,7 +1019,7 @@ class StubGenerator: public StubCodeGenerator {
__ bind(L_exit);
__ blr();
// Handle copies less than 8 bytes. Int is handled elsewhere.
// Handle copies less than 8 bytes. Int is handled elsewhere.
if (t == T_BYTE) {
__ bind(L_fill_elements);
Label L_fill_2, L_fill_4;
@ -1039,7 +1061,7 @@ class StubGenerator: public StubCodeGenerator {
}
// Generate overlap test for array copy stubs
// Generate overlap test for array copy stubs.
//
// Input:
// R3_ARG1 - from
@ -1873,10 +1895,7 @@ class StubGenerator: public StubCodeGenerator {
generate_conjoint_long_copy_core(aligned);
}
gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
__ blr();
gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
return start;
}
@ -1906,9 +1925,7 @@ class StubGenerator: public StubCodeGenerator {
generate_disjoint_long_copy_core(aligned);
}
gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
__ blr();
gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
return start;
}

View File

@ -89,16 +89,17 @@ void VM_Version::initialize() {
}
// On Power6 test for section size.
if (PowerArchitecturePPC64 == 6)
if (PowerArchitecturePPC64 == 6) {
determine_section_size();
// TODO: PPC port else
// TODO: PPC port } else {
// TODO: PPC port PdScheduling::power6SectorSize = 0x20;
}
MaxVectorSize = 8;
#endif
// Create and print feature-string.
char buf[(num_features+1) * 16]; // max 16 chars per feature
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
jio_snprintf(buf, sizeof(buf),
"ppc64%s%s%s%s%s%s%s%s",
(has_fsqrt() ? " fsqrt" : ""),
@ -127,21 +128,21 @@ void VM_Version::initialize() {
if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1;
if (AllocatePrefetchStyle == 4) {
AllocatePrefetchStepSize = cache_line_size; // need exact value
if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // use larger blocks by default
if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // default is not defined ?
AllocatePrefetchStepSize = cache_line_size; // Need exact value.
if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // Use larger blocks by default.
if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // Default is not defined?
} else {
if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size;
if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value
if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // default is not defined ?
if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value.
if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // Default is not defined?
}
assert(AllocatePrefetchLines > 0, "invalid value");
if (AllocatePrefetchLines < 1) // Set valid value in product VM.
AllocatePrefetchLines = 1; // Conservative value
AllocatePrefetchLines = 1; // Conservative value.
if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size)
AllocatePrefetchStyle = 1; // fall back if inappropriate
AllocatePrefetchStyle = 1; // Fall back if inappropriate.
assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
}
@ -160,13 +161,13 @@ void VM_Version::determine_section_size() {
const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord;
// Allocate space for the code
// Allocate space for the code.
ResourceMark rm;
CodeBuffer cb("detect_section_size", code_size, 0);
MacroAssembler* a = new MacroAssembler(&cb);
uint32_t *code = (uint32_t *)a->pc();
// emit code.
// Emit code.
void (*test1)() = (void(*)())(void *)a->emit_fd();
Label l1;
@ -189,58 +190,58 @@ void VM_Version::determine_section_size() {
// ;; 1
a->nop(); // 5
a->fmr(F6, F6); // 6
a->fmr(F7, F7); // 7
a->fmr(F6, F6); // 6
a->fmr(F7, F7); // 7
a->endgroup(); // 8
// ------- sector 8 ------------
// ;; 2
a->nop(); // 9
a->nop(); // 10
a->fmr(F8, F8); // 11
a->fmr(F9, F9); // 12
a->fmr(F8, F8); // 11
a->fmr(F9, F9); // 12
// ;; 3
a->nop(); // 13
a->fmr(F10, F10); // 14
a->fmr(F11, F11); // 15
a->fmr(F10, F10); // 14
a->fmr(F11, F11); // 15
a->endgroup(); // 16
// -------- sector 16 -------------
// ;; 4
a->nop(); // 17
a->nop(); // 18
a->fmr(F15, F15); // 19
a->fmr(F16, F16); // 20
a->fmr(F15, F15); // 19
a->fmr(F16, F16); // 20
// ;; 5
a->nop(); // 21
a->fmr(F17, F17); // 22
a->fmr(F18, F18); // 23
a->fmr(F17, F17); // 22
a->fmr(F18, F18); // 23
a->endgroup(); // 24
// ------- sector 24 ------------
// ;; 6
a->nop(); // 25
a->nop(); // 26
a->fmr(F19, F19); // 27
a->fmr(F20, F20); // 28
a->fmr(F19, F19); // 27
a->fmr(F20, F20); // 28
// ;; 7
a->nop(); // 29
a->fmr(F21, F21); // 30
a->fmr(F22, F22); // 31
a->fmr(F21, F21); // 30
a->fmr(F22, F22); // 31
a->brnop0(); // 32
// ------- sector 32 ------------
}
// ;; 8
a->cmpdi(CCR0, R4, unroll);// 33
a->bge(CCR0, l1); // 34
a->cmpdi(CCR0, R4, unroll); // 33
a->bge(CCR0, l1); // 34
a->blr();
// emit code.
// Emit code.
void (*test2)() = (void(*)())(void *)a->emit_fd();
// uint32_t *code = (uint32_t *)a->pc();
@ -382,39 +383,40 @@ void VM_Version::determine_section_size() {
#endif // COMPILER2
void VM_Version::determine_features() {
const int code_size = (num_features+1+2*7)*BytesPerInstWord; // 7 InstWords for each call (function descriptor + blr instruction)
// 7 InstWords for each call (function descriptor + blr instruction).
const int code_size = (num_features+1+2*7)*BytesPerInstWord;
int features = 0;
// create test area
enum { BUFFER_SIZE = 2*4*K }; // needs to be >=2* max cache line size (cache line size can't exceed min page size)
enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size).
char test_area[BUFFER_SIZE];
char *mid_of_test_area = &test_area[BUFFER_SIZE>>1];
// Allocate space for the code
// Allocate space for the code.
ResourceMark rm;
CodeBuffer cb("detect_cpu_features", code_size, 0);
MacroAssembler* a = new MacroAssembler(&cb);
// emit code.
// Emit code.
void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->emit_fd();
uint32_t *code = (uint32_t *)a->pc();
// Don't use R0 in ldarx.
// keep R3_ARG1 = R3 unmodified, it contains &field (see below)
// keep R4_ARG2 = R4 unmodified, it contains offset = 0 (see below)
a->fsqrt(F3, F4); // code[0] -> fsqrt_m
a->isel(R7, R5, R6, 0); // code[1] -> isel_m
a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1);// code[2] -> lxarx_m
a->cmpb(R7, R5, R6); // code[3] -> bcmp
//a->mftgpr(R7, F3); // code[4] -> mftgpr
a->popcntb(R7, R5); // code[5] -> popcntb
a->popcntw(R7, R5); // code[6] -> popcntw
a->fcfids(F3, F4); // code[7] -> fcfids
a->vand(VR0, VR0, VR0); // code[8] -> vand
// Keep R3_ARG1 unmodified, it contains &field (see below).
// Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
a->fsqrt(F3, F4); // code[0] -> fsqrt_m
a->isel(R7, R5, R6, 0); // code[1] -> isel_m
a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[2] -> lxarx_m
a->cmpb(R7, R5, R6); // code[3] -> bcmp
//a->mftgpr(R7, F3); // code[4] -> mftgpr
a->popcntb(R7, R5); // code[5] -> popcntb
a->popcntw(R7, R5); // code[6] -> popcntw
a->fcfids(F3, F4); // code[7] -> fcfids
a->vand(VR0, VR0, VR0); // code[8] -> vand
a->blr();
// Emit function to set one cache line to zero
void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd(); // emit function descriptor and get pointer to it
a->dcbz(R3_ARG1); // R3_ARG1 = R3 = addr
// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd();
a->dcbz(R3_ARG1); // R3_ARG1 = addr
a->blr();
uint32_t *code_end = (uint32_t *)a->pc();
@ -428,8 +430,8 @@ void VM_Version::determine_features() {
}
// Measure cache line size.
memset(test_area, 0xFF, BUFFER_SIZE); // fill test area with 0xFF
(*zero_cacheline_func_ptr)(mid_of_test_area); // call function which executes dcbz to the middle
memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF.
(*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle.
int count = 0; // count zeroed bytes
for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++;
guarantee(is_power_of_2(count), "cache line size needs to be a power of 2");

View File

@ -113,7 +113,7 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
// If the vtable entry is null, the method is abstract.
address ame_addr = __ pc(); // ame = abstract method error
__ ld_with_trap_null_check(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
__ load_with_trap_null_check(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
__ mtctr(R12_scratch2);
__ bctr();
masm->flush();
@ -147,7 +147,7 @@ VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
// Entry arguments:
// R19_method: Interface
// R3_ARG1: Receiver
// R3_ARG1: Receiver
//
const Register rcvr_klass = R11_scratch1;

View File

@ -34,114 +34,114 @@
//
// Machine barrier instructions:
//
// - ppc_sync Two-way memory barrier, aka fence.
// - ppc_lwsync orders Store|Store,
// Load|Store,
// Load|Load,
// but not Store|Load
// - ppc_eieio orders Store|Store
// - ppc_isync Invalidates speculatively executed instructions,
// but isync may complete before storage accesses
// associated with instructions preceding isync have
// been performed.
// - sync Two-way memory barrier, aka fence.
// - lwsync orders Store|Store,
// Load|Store,
// Load|Load,
// but not Store|Load
// - eieio orders Store|Store
// - isync Invalidates speculatively executed instructions,
// but isync may complete before storage accesses
// associated with instructions preceding isync have
// been performed.
//
// Semantic barrier instructions:
// (as defined in orderAccess.hpp)
//
// - ppc_release orders Store|Store, (maps to ppc_lwsync)
// Load|Store
// - ppc_acquire orders Load|Store, (maps to ppc_lwsync)
// Load|Load
// - ppc_fence orders Store|Store, (maps to ppc_sync)
// Load|Store,
// Load|Load,
// Store|Load
// - release orders Store|Store, (maps to lwsync)
// Load|Store
// - acquire orders Load|Store, (maps to lwsync)
// Load|Load
// - fence orders Store|Store, (maps to sync)
// Load|Store,
// Load|Load,
// Store|Load
//
#define inlasm_ppc_sync() __asm__ __volatile__ ("sync" : : : "memory");
#define inlasm_ppc_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory");
#define inlasm_ppc_eieio() __asm__ __volatile__ ("eieio" : : : "memory");
#define inlasm_ppc_isync() __asm__ __volatile__ ("isync" : : : "memory");
#define inlasm_ppc_release() inlasm_ppc_lwsync();
#define inlasm_ppc_acquire() inlasm_ppc_lwsync();
#define inlasm_sync() __asm__ __volatile__ ("sync" : : : "memory");
#define inlasm_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory");
#define inlasm_eieio() __asm__ __volatile__ ("eieio" : : : "memory");
#define inlasm_isync() __asm__ __volatile__ ("isync" : : : "memory");
#define inlasm_release() inlasm_lwsync();
#define inlasm_acquire() inlasm_lwsync();
// Use twi-isync for load_acquire (faster than lwsync).
// ATTENTION: seems like xlC 10.1 has problems with this inline assembler macro (VerifyMethodHandles found "bad vminfo in AMH.conv"):
// #define inlasm_ppc_acquire_reg(X) __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (X) : "memory");
#define inlasm_ppc_acquire_reg(X) inlasm_ppc_lwsync();
#define inlasm_ppc_fence() inlasm_ppc_sync();
// #define inlasm_acquire_reg(X) __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (X) : "memory");
#define inlasm_acquire_reg(X) inlasm_lwsync();
#define inlasm_fence() inlasm_sync();
inline void OrderAccess::loadload() { inlasm_ppc_lwsync(); }
inline void OrderAccess::storestore() { inlasm_ppc_lwsync(); }
inline void OrderAccess::loadstore() { inlasm_ppc_lwsync(); }
inline void OrderAccess::storeload() { inlasm_ppc_fence(); }
inline void OrderAccess::loadload() { inlasm_lwsync(); }
inline void OrderAccess::storestore() { inlasm_lwsync(); }
inline void OrderAccess::loadstore() { inlasm_lwsync(); }
inline void OrderAccess::storeload() { inlasm_fence(); }
inline void OrderAccess::acquire() { inlasm_ppc_acquire(); }
inline void OrderAccess::release() { inlasm_ppc_release(); }
inline void OrderAccess::fence() { inlasm_ppc_fence(); }
inline void OrderAccess::acquire() { inlasm_acquire(); }
inline void OrderAccess::release() { inlasm_release(); }
inline void OrderAccess::fence() { inlasm_fence(); }
inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { register jbyte t = *p; inlasm_ppc_acquire_reg(t); return t; }
inline jshort OrderAccess::load_acquire(volatile jshort* p) { register jshort t = *p; inlasm_ppc_acquire_reg(t); return t; }
inline jint OrderAccess::load_acquire(volatile jint* p) { register jint t = *p; inlasm_ppc_acquire_reg(t); return t; }
inline jlong OrderAccess::load_acquire(volatile jlong* p) { register jlong t = *p; inlasm_ppc_acquire_reg(t); return t; }
inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { register jubyte t = *p; inlasm_ppc_acquire_reg(t); return t; }
inline jushort OrderAccess::load_acquire(volatile jushort* p) { register jushort t = *p; inlasm_ppc_acquire_reg(t); return t; }
inline juint OrderAccess::load_acquire(volatile juint* p) { register juint t = *p; inlasm_ppc_acquire_reg(t); return t; }
inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { register jbyte t = *p; inlasm_acquire_reg(t); return t; }
inline jshort OrderAccess::load_acquire(volatile jshort* p) { register jshort t = *p; inlasm_acquire_reg(t); return t; }
inline jint OrderAccess::load_acquire(volatile jint* p) { register jint t = *p; inlasm_acquire_reg(t); return t; }
inline jlong OrderAccess::load_acquire(volatile jlong* p) { register jlong t = *p; inlasm_acquire_reg(t); return t; }
inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { register jubyte t = *p; inlasm_acquire_reg(t); return t; }
inline jushort OrderAccess::load_acquire(volatile jushort* p) { register jushort t = *p; inlasm_acquire_reg(t); return t; }
inline juint OrderAccess::load_acquire(volatile juint* p) { register juint t = *p; inlasm_acquire_reg(t); return t; }
inline julong OrderAccess::load_acquire(volatile julong* p) { return (julong)load_acquire((volatile jlong*)p); }
inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { register jfloat t = *p; inlasm_ppc_acquire(); return t; }
inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { register jdouble t = *p; inlasm_ppc_acquire(); return t; }
inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { register jfloat t = *p; inlasm_acquire(); return t; }
inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { register jdouble t = *p; inlasm_acquire(); return t; }
inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { return (intptr_t)load_acquire((volatile jlong*)p); }
inline void* OrderAccess::load_ptr_acquire(volatile void* p) { return (void*) load_acquire((volatile jlong*)p); }
inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { return (void*) load_acquire((volatile jlong*)p); }
inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { inlasm_ppc_release(); *p = v; }
inline void OrderAccess::release_store(volatile jshort* p, jshort v) { inlasm_ppc_release(); *p = v; }
inline void OrderAccess::release_store(volatile jint* p, jint v) { inlasm_ppc_release(); *p = v; }
inline void OrderAccess::release_store(volatile jlong* p, jlong v) { inlasm_ppc_release(); *p = v; }
inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { inlasm_ppc_release(); *p = v; }
inline void OrderAccess::release_store(volatile jushort* p, jushort v) { inlasm_ppc_release(); *p = v; }
inline void OrderAccess::release_store(volatile juint* p, juint v) { inlasm_ppc_release(); *p = v; }
inline void OrderAccess::release_store(volatile julong* p, julong v) { inlasm_ppc_release(); *p = v; }
inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { inlasm_ppc_release(); *p = v; }
inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { inlasm_ppc_release(); *p = v; }
inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { inlasm_release(); *p = v; }
inline void OrderAccess::release_store(volatile jshort* p, jshort v) { inlasm_release(); *p = v; }
inline void OrderAccess::release_store(volatile jint* p, jint v) { inlasm_release(); *p = v; }
inline void OrderAccess::release_store(volatile jlong* p, jlong v) { inlasm_release(); *p = v; }
inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { inlasm_release(); *p = v; }
inline void OrderAccess::release_store(volatile jushort* p, jushort v) { inlasm_release(); *p = v; }
inline void OrderAccess::release_store(volatile juint* p, juint v) { inlasm_release(); *p = v; }
inline void OrderAccess::release_store(volatile julong* p, julong v) { inlasm_release(); *p = v; }
inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { inlasm_release(); *p = v; }
inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { inlasm_release(); *p = v; }
inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { inlasm_ppc_release(); *p = v; }
inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { inlasm_ppc_release(); *(void* volatile *)p = v; }
inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { inlasm_release(); *p = v; }
inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { inlasm_release(); *(void* volatile *)p = v; }
inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; inlasm_fence(); }
inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; inlasm_fence(); }
inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; inlasm_fence(); }
inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; inlasm_fence(); }
inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; inlasm_fence(); }
inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; inlasm_fence(); }
inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; inlasm_fence(); }
inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; inlasm_fence(); }
inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; inlasm_fence(); }
inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; inlasm_fence(); }
inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; inlasm_fence(); }
inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; inlasm_fence(); }
inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { inlasm_release(); *p = v; inlasm_fence(); }
inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { inlasm_release(); *p = v; inlasm_fence(); }
inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { inlasm_release(); *p = v; inlasm_fence(); }
inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { inlasm_release(); *p = v; inlasm_fence(); }
inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { inlasm_release(); *p = v; inlasm_fence(); }
inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { inlasm_release(); *p = v; inlasm_fence(); }
inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { inlasm_release(); *p = v; inlasm_fence(); }
inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { inlasm_release(); *p = v; inlasm_fence(); }
inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { inlasm_release(); *p = v; inlasm_fence(); }
inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { inlasm_release(); *p = v; inlasm_fence(); }
inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { inlasm_ppc_release(); *p = v; inlasm_ppc_fence(); }
inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { inlasm_ppc_release(); *(void* volatile *)p = v; inlasm_ppc_fence(); }
inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { inlasm_release(); *p = v; inlasm_fence(); }
inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { inlasm_release(); *(void* volatile *)p = v; inlasm_fence(); }
#undef inlasm_ppc_sync
#undef inlasm_ppc_lwsync
#undef inlasm_ppc_eieio
#undef inlasm_ppc_isync
#undef inlasm_ppc_release
#undef inlasm_ppc_acquire
#undef inlasm_ppc_fence
#undef inlasm_sync
#undef inlasm_lwsync
#undef inlasm_eieio
#undef inlasm_isync
#undef inlasm_release
#undef inlasm_acquire
#undef inlasm_fence
#endif // OS_CPU_AIX_OJDKPPC_VM_ORDERACCESS_AIX_PPC_INLINE_HPP

View File

@ -67,7 +67,7 @@ address os::current_stack_pointer() {
address csp;
#if !defined(USE_XLC_BUILTINS)
// inline assembly for `ppc_mr regno(csp), PPC_SP':
// inline assembly for `mr regno(csp), R1_SP':
__asm__ __volatile__ ("mr %0, 1":"=r"(csp):);
#else
csp = (address) __builtin_frame_address(0);
@ -263,7 +263,7 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
goto report_and_die;
} else {
// this means a segv happened inside our stack, but not in
// This means a segv happened inside our stack, but not in
// the guarded zone. I'd like to know when this happens,
tty->print_raw_cr("SIGSEGV happened inside stack but outside yellow and red zone.");
goto report_and_die;
@ -312,53 +312,57 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
// in the zero page, because it is filled with 0x0. We ignore
// explicit SIGILLs in the zero page.
if (sig == SIGILL && (pc < (address) 0x200)) {
if (TraceTraps)
if (TraceTraps) {
tty->print_raw_cr("SIGILL happened inside zero page.");
}
goto report_and_die;
}
// Handle signal from NativeJump::patch_verified_entry().
if (( TrapBasedNotEntrantChecks && sig == SIGTRAP && nativeInstruction_at(pc)->is_sigtrap_zombie_not_entrant()) ||
(!TrapBasedNotEntrantChecks && sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant())) {
if (TraceTraps)
if (TraceTraps) {
tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
}
stub = SharedRuntime::get_handle_wrong_method_stub();
goto run_stub;
}
else if (sig == SIGSEGV && os::is_poll_address(addr)) {
if (TraceTraps)
if (TraceTraps) {
tty->print_cr("trap: safepoint_poll at " INTPTR_FORMAT " (SIGSEGV)", pc);
}
stub = SharedRuntime::get_poll_stub(pc);
goto run_stub;
}
// SIGTRAP-based ic miss check in compiled code
// SIGTRAP-based ic miss check in compiled code.
else if (sig == SIGTRAP && TrapBasedICMissChecks &&
nativeInstruction_at(pc)->is_sigtrap_ic_miss_check()) {
if (TraceTraps)
if (TraceTraps) {
tty->print_cr("trap: ic_miss_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
}
stub = SharedRuntime::get_ic_miss_stub();
goto run_stub;
}
#ifdef COMPILER2
// SIGTRAP-based implicit null check in compiled code.
else if (sig == SIGTRAP && TrapBasedNullChecks &&
nativeInstruction_at(pc)->is_sigtrap_null_check()) {
if (TraceTraps)
if (TraceTraps) {
tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
}
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
goto run_stub;
}
#endif
// SIGSEGV-based implicit null check in compiled code.
else if (sig == SIGSEGV && ImplicitNullChecks &&
CodeCache::contains((void*) pc) &&
!MacroAssembler::needs_explicit_null_check((intptr_t) info->si_addr)) {
if (TraceTraps)
if (TraceTraps) {
tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGSEGV)", pc);
}
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
}
@ -366,8 +370,9 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
// SIGTRAP-based implicit range check in compiled code.
else if (sig == SIGTRAP && TrapBasedRangeChecks &&
nativeInstruction_at(pc)->is_sigtrap_range_check()) {
if (TraceTraps)
if (TraceTraps) {
tty->print_cr("trap: range_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
}
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
goto run_stub;
}

View File

@ -58,35 +58,35 @@ inline jlong Atomic::load(volatile jlong* src) { return *src; }
- sync two-way memory barrier, aka fence
- lwsync orders Store|Store,
Load|Store,
Load|Load,
but not Store|Load
Load|Store,
Load|Load,
but not Store|Load
- eieio orders memory accesses for device memory (only)
- isync invalidates speculatively executed instructions
From the POWER ISA 2.06 documentation:
"[...] an isync instruction prevents the execution of
instructions following the isync until instructions
preceding the isync have completed, [...]"
From IBM's AIX assembler reference:
"The isync [...] instructions causes the processor to
refetch any instructions that might have been fetched
prior to the isync instruction. The instruction isync
causes the processor to wait for all previous instructions
to complete. Then any instructions already fetched are
discarded and instruction processing continues in the
environment established by the previous instructions."
From the POWER ISA 2.06 documentation:
"[...] an isync instruction prevents the execution of
instructions following the isync until instructions
preceding the isync have completed, [...]"
From IBM's AIX assembler reference:
"The isync [...] instructions causes the processor to
refetch any instructions that might have been fetched
prior to the isync instruction. The instruction isync
causes the processor to wait for all previous instructions
to complete. Then any instructions already fetched are
discarded and instruction processing continues in the
environment established by the previous instructions."
semantic barrier instructions:
(as defined in orderAccess.hpp)
- release orders Store|Store, (maps to lwsync)
Load|Store
Load|Store
- acquire orders Load|Store, (maps to lwsync)
Load|Load
Load|Load
- fence orders Store|Store, (maps to sync)
Load|Store,
Load|Load,
Store|Load
Load|Store,
Load|Load,
Store|Load
*/
#define strasm_sync "\n sync \n"

View File

@ -40,26 +40,26 @@
//
// - sync Two-way memory barrier, aka fence.
// - lwsync orders Store|Store,
// Load|Store,
// Load|Load,
// but not Store|Load
// Load|Store,
// Load|Load,
// but not Store|Load
// - eieio orders Store|Store
// - isync Invalidates speculatively executed instructions,
// but isync may complete before storage accesses
// associated with instructions preceding isync have
// been performed.
// but isync may complete before storage accesses
// associated with instructions preceding isync have
// been performed.
//
// Semantic barrier instructions:
// (as defined in orderAccess.hpp)
//
// - release orders Store|Store, (maps to lwsync)
// Load|Store
// Load|Store
// - acquire orders Load|Store, (maps to lwsync)
// Load|Load
// Load|Load
// - fence orders Store|Store, (maps to sync)
// Load|Store,
// Load|Load,
// Store|Load
// Load|Store,
// Load|Load,
// Store|Load
//
#define inlasm_sync() __asm__ __volatile__ ("sync" : : : "memory");

View File

@ -284,16 +284,18 @@ JVM_handle_linux_signal(int sig,
// in the zero page, because it is filled with 0x0. We ignore
// explicit SIGILLs in the zero page.
if (sig == SIGILL && (pc < (address) 0x200)) {
if (TraceTraps)
if (TraceTraps) {
tty->print_raw_cr("SIGILL happened inside zero page.");
}
goto report_and_die;
}
// Handle signal from NativeJump::patch_verified_entry().
if (( TrapBasedNotEntrantChecks && sig == SIGTRAP && nativeInstruction_at(pc)->is_sigtrap_zombie_not_entrant()) ||
(!TrapBasedNotEntrantChecks && sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant())) {
if (TraceTraps)
if (TraceTraps) {
tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
}
stub = SharedRuntime::get_handle_wrong_method_stub();
}
@ -304,24 +306,27 @@ JVM_handle_linux_signal(int sig,
// (address)info->si_addr == os::get_standard_polling_page()
// doesn't work for us. We use:
((NativeInstruction*)pc)->is_safepoint_poll()) {
if (TraceTraps)
if (TraceTraps) {
tty->print_cr("trap: safepoint_poll at " INTPTR_FORMAT " (SIGSEGV)", pc);
}
stub = SharedRuntime::get_poll_stub(pc);
}
// SIGTRAP-based ic miss check in compiled code.
else if (sig == SIGTRAP && TrapBasedICMissChecks &&
nativeInstruction_at(pc)->is_sigtrap_ic_miss_check()) {
if (TraceTraps)
if (TraceTraps) {
tty->print_cr("trap: ic_miss_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
}
stub = SharedRuntime::get_ic_miss_stub();
}
// SIGTRAP-based implicit null check in compiled code.
else if (sig == SIGTRAP && TrapBasedNullChecks &&
nativeInstruction_at(pc)->is_sigtrap_null_check()) {
if (TraceTraps)
if (TraceTraps) {
tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
}
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
}
@ -329,8 +334,9 @@ JVM_handle_linux_signal(int sig,
else if (sig == SIGSEGV && ImplicitNullChecks &&
CodeCache::contains((void*) pc) &&
!MacroAssembler::needs_explicit_null_check((intptr_t) info->si_addr)) {
if (TraceTraps)
if (TraceTraps) {
tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGSEGV)", pc);
}
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
}
@ -338,8 +344,9 @@ JVM_handle_linux_signal(int sig,
// SIGTRAP-based implicit range check in compiled code.
else if (sig == SIGTRAP && TrapBasedRangeChecks &&
nativeInstruction_at(pc)->is_sigtrap_range_check()) {
if (TraceTraps)
if (TraceTraps) {
tty->print_cr("trap: range_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
}
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
}
#endif