2022-12-18 23:45:17 -08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
# https://www.intel.com/content/dam/develop/public/us/en/documents/325383-sdm-vol-2abcd.pdf
|
2022-12-17 13:39:35 -08:00
|
|
|
module RubyVM::MJIT
|
|
|
|
class X86Assembler
|
2022-12-26 14:09:45 -08:00
|
|
|
class Label < Data.define(:id, :name); end
|
|
|
|
|
2022-12-17 13:39:35 -08:00
|
|
|
ByteWriter = CType::Immediate.parse('char')
|
|
|
|
|
2022-12-18 23:45:17 -08:00
|
|
|
### prefix ###
|
|
|
|
# REX = 0100WR0B
|
|
|
|
REX_W = 0b01001000
|
|
|
|
|
2022-12-26 22:06:43 -08:00
|
|
|
attr_reader :comments
|
|
|
|
|
2022-12-17 13:39:35 -08:00
|
|
|
def initialize
|
|
|
|
@bytes = []
|
2022-12-26 14:09:45 -08:00
|
|
|
@labels = {}
|
2022-12-26 22:06:43 -08:00
|
|
|
@label_id = 0
|
|
|
|
@comments = Hash.new { |h, k| h[k] = [] }
|
2022-12-17 13:39:35 -08:00
|
|
|
end
|
|
|
|
|
2022-12-17 22:00:16 -08:00
|
|
|
def compile(addr)
|
2022-12-26 14:09:45 -08:00
|
|
|
link_labels
|
2022-12-17 22:00:16 -08:00
|
|
|
writer = ByteWriter.new(addr)
|
|
|
|
# If you pack bytes containing \x00, Ruby fails to recognize bytes after \x00.
|
|
|
|
# So writing byte by byte to avoid hitting that situation.
|
|
|
|
@bytes.each_with_index do |byte, index|
|
|
|
|
writer[index] = byte
|
|
|
|
end
|
|
|
|
@bytes.size
|
|
|
|
ensure
|
2022-12-17 13:39:35 -08:00
|
|
|
@bytes.clear
|
|
|
|
end
|
|
|
|
|
2022-12-19 00:09:45 -08:00
|
|
|
def add(dst, src)
|
|
|
|
case [dst, src]
|
2022-12-26 22:46:40 -08:00
|
|
|
# ADD r/m64, imm8 (Mod 11)
|
2022-12-23 10:43:18 -08:00
|
|
|
in [Symbol => dst_reg, Integer => src_imm] if r64?(dst_reg) && imm8?(src_imm)
|
2022-12-19 00:09:45 -08:00
|
|
|
# REX.W + 83 /0 ib
|
|
|
|
# MI: Operand 1: ModRM:r/m (r, w), Operand 2: imm8/16/32
|
|
|
|
insn(
|
|
|
|
prefix: REX_W,
|
|
|
|
opcode: 0x83,
|
|
|
|
mod_rm: mod_rm(mod: 0b11, rm: reg_code(dst_reg)),
|
|
|
|
imm: imm8(src_imm),
|
|
|
|
)
|
2022-12-26 22:46:40 -08:00
|
|
|
# ADD r/m64, imm8 (Mod 00)
|
|
|
|
in [[Symbol => dst_reg], Integer => src_imm] if r64?(dst_reg) && imm8?(src_imm)
|
|
|
|
# REX.W + 83 /0 ib
|
|
|
|
# MI: Operand 1: ModRM:r/m (r, w), Operand 2: imm8/16/32
|
|
|
|
insn(
|
|
|
|
prefix: REX_W,
|
|
|
|
opcode: 0x83,
|
|
|
|
mod_rm: mod_rm(mod: 0b00, rm: reg_code(dst_reg)), # Mod 00: [reg]
|
|
|
|
imm: imm8(src_imm),
|
|
|
|
)
|
2022-12-19 00:09:45 -08:00
|
|
|
else
|
2022-12-23 14:46:39 -08:00
|
|
|
raise NotImplementedError, "add: not-implemented operands: #{dst.inspect}, #{src.inspect}"
|
2022-12-19 00:09:45 -08:00
|
|
|
end
|
2022-12-17 13:39:35 -08:00
|
|
|
end
|
|
|
|
|
2022-12-26 14:09:45 -08:00
|
|
|
# JZ rel8
|
|
|
|
# @param [RubyVM::MJIT::X86Assembler::Label] label
|
|
|
|
def jz(label)
|
|
|
|
# 74 cb
|
|
|
|
insn(opcode: 0x74)
|
|
|
|
@bytes.push(label)
|
|
|
|
end
|
|
|
|
|
2022-12-18 23:45:17 -08:00
|
|
|
def mov(dst, src)
|
2022-12-28 13:16:02 -08:00
|
|
|
case dst
|
|
|
|
in Symbol => dst_reg
|
|
|
|
case src
|
|
|
|
# MOV r64, r/m64 (Mod 00)
|
|
|
|
in [Symbol => src_reg] if r64?(dst_reg) && r64?(src_reg)
|
|
|
|
# REX.W + 8B /r
|
|
|
|
# RM: Operand 1: ModRM:reg (w), Operand 2: ModRM:r/m (r)
|
|
|
|
insn(
|
|
|
|
prefix: REX_W,
|
|
|
|
opcode: 0x8b,
|
|
|
|
mod_rm: mod_rm(mod: 0b00, reg: reg_code(dst_reg), rm: reg_code(src_reg)), # Mod 00: [reg]
|
|
|
|
)
|
|
|
|
# MOV r32 r/m32 (Mod 01)
|
|
|
|
in [Symbol => src_reg, Integer => src_disp] if r32?(dst_reg) && imm8?(src_disp)
|
|
|
|
# 8B /r
|
|
|
|
# RM: Operand 1: ModRM:reg (w), Operand 2: ModRM:r/m (r)
|
|
|
|
insn(
|
|
|
|
opcode: 0x8b,
|
|
|
|
mod_rm: mod_rm(mod: 0b01, reg: reg_code(dst_reg), rm: reg_code(src_reg)), # Mod 01: [reg]+disp8
|
|
|
|
disp: src_disp,
|
|
|
|
)
|
|
|
|
# MOV r64, r/m64 (Mod 01)
|
|
|
|
in [Symbol => src_reg, Integer => src_disp] if r64?(dst_reg) && r64?(src_reg) && imm8?(src_disp)
|
|
|
|
# REX.W + 8B /r
|
|
|
|
# RM: Operand 1: ModRM:reg (w), Operand 2: ModRM:r/m (r)
|
|
|
|
insn(
|
|
|
|
prefix: REX_W,
|
|
|
|
opcode: 0x8b,
|
|
|
|
mod_rm: mod_rm(mod: 0b01, reg: reg_code(dst_reg), rm: reg_code(src_reg)), # Mod 01: [reg]+disp8
|
|
|
|
disp: src_disp,
|
|
|
|
)
|
|
|
|
# MOV r/m64, imm32 (Mod 11)
|
|
|
|
in Integer => src_imm if r64?(dst_reg) && imm32?(src_imm)
|
|
|
|
# REX.W + C7 /0 id
|
|
|
|
# MI: Operand 1: ModRM:r/m (w), Operand 2: imm8/16/32/64
|
|
|
|
insn(
|
|
|
|
prefix: REX_W,
|
|
|
|
opcode: 0xc7,
|
|
|
|
mod_rm: mod_rm(mod: 0b11, rm: reg_code(dst_reg)), # Mod 11: reg
|
|
|
|
imm: imm32(src_imm),
|
|
|
|
)
|
|
|
|
# MOV r64, imm64
|
|
|
|
in Integer => src_imm if r64?(dst_reg) && imm64?(src_imm)
|
|
|
|
# REX.W + B8+ rd io
|
|
|
|
# OI: Operand 1: opcode + rd (w), Operand 2: imm8/16/32/64
|
|
|
|
insn(
|
|
|
|
prefix: REX_W,
|
|
|
|
opcode: 0xb8 + reg_code(dst_reg),
|
|
|
|
imm: imm64(src_imm),
|
|
|
|
)
|
|
|
|
else
|
|
|
|
raise NotImplementedError, "mov: not-implemented operands: #{dst.inspect}, #{src.inspect}"
|
|
|
|
end
|
|
|
|
in [Symbol => dst_reg]
|
|
|
|
case src
|
|
|
|
# MOV r/m64, imm32 (Mod 00)
|
|
|
|
in Integer => src_imm if r64?(dst_reg) && imm32?(src_imm)
|
|
|
|
# REX.W + C7 /0 id
|
|
|
|
# MI: Operand 1: ModRM:r/m (w), Operand 2: imm8/16/32/64
|
|
|
|
insn(
|
|
|
|
prefix: REX_W,
|
|
|
|
opcode: 0xc7,
|
|
|
|
mod_rm: mod_rm(mod: 0b00, rm: reg_code(dst_reg)), # Mod 00: [reg]
|
|
|
|
imm: imm32(src_imm),
|
|
|
|
)
|
2022-12-28 13:16:14 -08:00
|
|
|
# MOV r/m64, r64 (Mod 00)
|
|
|
|
in Symbol => src_reg if r64?(dst_reg) && r64?(src_reg)
|
|
|
|
# REX.W + 89 /r
|
|
|
|
# MR: Operand 1: ModRM:r/m (w), Operand 2: ModRM:reg (r)
|
|
|
|
insn(
|
|
|
|
prefix: REX_W,
|
|
|
|
opcode: 0x89,
|
|
|
|
mod_rm: mod_rm(mod: 0b00, reg: reg_code(src_reg), rm: reg_code(dst_reg)), # Mod 00: [reg]
|
|
|
|
)
|
|
|
|
else
|
|
|
|
raise NotImplementedError, "mov: not-implemented operands: #{dst.inspect}, #{src.inspect}"
|
2022-12-28 13:16:02 -08:00
|
|
|
end
|
|
|
|
in [Symbol => dst_reg, Integer => dst_disp]
|
2022-12-28 13:16:14 -08:00
|
|
|
# Optimize encoding when disp is 0
|
|
|
|
return mov([dst_reg], src) if dst_disp == 0
|
|
|
|
|
2022-12-28 13:16:02 -08:00
|
|
|
case src
|
2022-12-28 13:16:14 -08:00
|
|
|
# MOV r/m64, imm32 (Mod 01)
|
|
|
|
in Integer => src_imm if r64?(dst_reg) && imm8?(dst_disp) && imm32?(src_imm)
|
|
|
|
# REX.W + C7 /0 id
|
|
|
|
# MI: Operand 1: ModRM:r/m (w), Operand 2: imm8/16/32/64
|
|
|
|
insn(
|
|
|
|
prefix: REX_W,
|
|
|
|
opcode: 0xc7,
|
|
|
|
mod_rm: mod_rm(mod: 0b01, rm: reg_code(dst_reg)), # Mod 01: [reg]+disp8
|
|
|
|
disp: dst_disp,
|
|
|
|
imm: imm32(src_imm),
|
|
|
|
)
|
|
|
|
# MOV r/m64, r64 (Mod 01)
|
2022-12-28 13:16:02 -08:00
|
|
|
in Symbol => src_reg if r64?(dst_reg) && imm8?(dst_disp) && r64?(src_reg)
|
|
|
|
# REX.W + 89 /r
|
|
|
|
# MR: Operand 1: ModRM:r/m (w), Operand 2: ModRM:reg (r)
|
|
|
|
insn(
|
|
|
|
prefix: REX_W,
|
|
|
|
opcode: 0x89,
|
|
|
|
mod_rm: mod_rm(mod: 0b01, reg: reg_code(src_reg), rm: reg_code(dst_reg)), # Mod 01: [reg]+disp8
|
|
|
|
disp: dst_disp,
|
|
|
|
)
|
|
|
|
else
|
|
|
|
raise NotImplementedError, "mov: not-implemented operands: #{dst.inspect}, #{src.inspect}"
|
|
|
|
end
|
2022-12-17 13:39:35 -08:00
|
|
|
else
|
2022-12-23 14:46:39 -08:00
|
|
|
raise NotImplementedError, "mov: not-implemented operands: #{dst.inspect}, #{src.inspect}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def push(src)
|
|
|
|
case src
|
|
|
|
# PUSH r64
|
|
|
|
in Symbol => src_reg if r64?(src_reg)
|
|
|
|
# 50+rd
|
|
|
|
# O: Operand 1: opcode + rd (r)
|
|
|
|
insn(opcode: 0x50 + reg_code(src_reg))
|
|
|
|
else
|
|
|
|
raise NotImplementedError, "push: not-implemented operands: #{src.inspect}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def pop(dst)
|
|
|
|
case dst
|
|
|
|
# POP r64
|
|
|
|
in Symbol => dst_reg if r64?(dst_reg)
|
|
|
|
# 58+ rd
|
|
|
|
# O: Operand 1: opcode + rd (r)
|
|
|
|
insn(opcode: 0x58 + reg_code(dst_reg))
|
|
|
|
else
|
|
|
|
raise NotImplementedError, "pop: not-implemented operands: #{dst.inspect}"
|
2022-12-17 13:39:35 -08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-12-18 23:45:17 -08:00
|
|
|
# RET
|
2022-12-17 13:39:35 -08:00
|
|
|
def ret
|
2022-12-18 23:45:17 -08:00
|
|
|
# Near return: A return to a procedure within the current code segment
|
|
|
|
insn(opcode: 0xc3)
|
|
|
|
end
|
|
|
|
|
2022-12-26 14:09:45 -08:00
|
|
|
def test(left, right)
|
|
|
|
case [left, right]
|
|
|
|
# TEST r/m32, r32 (Mod 11)
|
|
|
|
in [Symbol => left_reg, Symbol => right_reg] if r32?(left_reg) && r32?(right_reg)
|
|
|
|
# 85 /r
|
|
|
|
# MR: Operand 1: ModRM:r/m (r), Operand 2: ModRM:reg (r)
|
|
|
|
insn(
|
|
|
|
opcode: 0x85,
|
|
|
|
mod_rm: mod_rm(mod: 0b11, reg: reg_code(right_reg), rm: reg_code(left_reg)), # Mod 11: reg
|
|
|
|
)
|
|
|
|
else
|
|
|
|
raise NotImplementedError, "pop: not-implemented operands: #{dst.inspect}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-12-26 22:06:43 -08:00
|
|
|
def comment(message)
|
|
|
|
@comments[@bytes.size] << message
|
|
|
|
end
|
|
|
|
|
2022-12-26 14:09:45 -08:00
|
|
|
def new_label(name)
|
|
|
|
Label.new(id: @label_id += 1, name:)
|
|
|
|
end
|
|
|
|
|
|
|
|
# @param [RubyVM::MJIT::X86Assembler::Label] label
|
|
|
|
def write_label(label)
|
|
|
|
@labels[label] = @bytes.size
|
|
|
|
end
|
|
|
|
|
2022-12-26 22:46:40 -08:00
|
|
|
def incr_counter(name)
|
|
|
|
if C.mjit_opts.stats
|
|
|
|
comment("increment counter #{name}")
|
|
|
|
mov(:rax, C.rb_mjit_counters[name].to_i)
|
|
|
|
add([:rax], 1) # TODO: lock
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-12-18 23:45:17 -08:00
|
|
|
private
|
|
|
|
|
|
|
|
def insn(prefix: nil, opcode:, mod_rm: nil, disp: nil, imm: nil)
|
|
|
|
if prefix
|
|
|
|
@bytes.push(prefix)
|
|
|
|
end
|
|
|
|
@bytes.push(opcode)
|
|
|
|
if mod_rm
|
|
|
|
@bytes.push(mod_rm)
|
|
|
|
end
|
|
|
|
if disp
|
|
|
|
if disp < 0 || disp > 0xff # TODO: support displacement in 2 or 4 bytes as well
|
|
|
|
raise NotImplementedError, "not-implemented disp: #{disp}"
|
|
|
|
end
|
|
|
|
@bytes.push(disp)
|
|
|
|
end
|
|
|
|
if imm
|
|
|
|
@bytes.push(*imm)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def reg_code(reg)
|
|
|
|
case reg
|
|
|
|
when :al, :ax, :eax, :rax then 0
|
|
|
|
when :cl, :cx, :ecx, :rcx then 1
|
|
|
|
when :dl, :dx, :edx, :rdx then 2
|
|
|
|
when :bl, :bx, :ebx, :rbx then 3
|
|
|
|
when :ah, :sp, :esp, :rsp then 4
|
|
|
|
when :ch, :bp, :ebp, :rbp then 5
|
|
|
|
when :dh, :si, :esi, :rsi then 6
|
|
|
|
when :bh, :di, :edi, :rdi then 7
|
|
|
|
else raise ArgumentError, "unexpected reg: #{reg.inspect}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Table 2-2. 32-Bit Addressing Forms with the ModR/M Byte
|
|
|
|
#
|
|
|
|
# 7 6 5 4 3 2 1 0
|
|
|
|
# +--+--+--+--+--+--+--+--+
|
|
|
|
# | Mod | Reg/ | R/M |
|
|
|
|
# | | Opcode | |
|
|
|
|
# +--+--+--+--+--+--+--+--+
|
|
|
|
#
|
|
|
|
# The r/m field can specify a register as an operand or it can be combined
|
|
|
|
# with the mod field to encode an addressing mode.
|
|
|
|
#
|
|
|
|
# /0: R/M is 0 (not used)
|
|
|
|
# /r: R/M is a register
|
2022-12-19 00:09:45 -08:00
|
|
|
def mod_rm(mod:, reg: 0, rm: 0)
|
2022-12-18 23:45:17 -08:00
|
|
|
if mod > 0b11
|
|
|
|
raise ArgumentError, "too large Mod: #{mod}"
|
|
|
|
end
|
|
|
|
if reg > 0b111
|
|
|
|
raise ArgumentError, "too large Reg/Opcode: #{reg}"
|
|
|
|
end
|
|
|
|
if rm > 0b111
|
|
|
|
raise ArgumentError, "too large R/M: #{rm}"
|
|
|
|
end
|
|
|
|
(mod << 6) + (reg << 3) + rm
|
|
|
|
end
|
|
|
|
|
2022-12-19 00:09:45 -08:00
|
|
|
# ib: 1 byte
|
|
|
|
def imm8(imm)
|
2022-12-23 10:43:18 -08:00
|
|
|
unless imm8?(imm)
|
2022-12-19 00:09:45 -08:00
|
|
|
raise ArgumentError, "unexpected imm8: #{imm}"
|
|
|
|
end
|
2022-12-23 10:43:18 -08:00
|
|
|
imm_bytes(imm, 1)
|
2022-12-19 00:09:45 -08:00
|
|
|
end
|
|
|
|
|
2022-12-18 23:45:17 -08:00
|
|
|
# id: 4 bytes
|
|
|
|
def imm32(imm)
|
2022-12-23 10:43:18 -08:00
|
|
|
unless imm32?(imm)
|
|
|
|
raise ArgumentError, "unexpected imm32: #{imm}"
|
|
|
|
end
|
|
|
|
imm_bytes(imm, 4)
|
|
|
|
end
|
|
|
|
|
|
|
|
# io: 8 bytes
|
|
|
|
def imm64(imm)
|
|
|
|
unless imm64?(imm)
|
|
|
|
raise ArgumentError, "unexpected imm64: #{imm}"
|
|
|
|
end
|
|
|
|
imm_bytes(imm, 8)
|
|
|
|
end
|
|
|
|
|
|
|
|
def imm_bytes(imm, num_bytes)
|
2022-12-18 23:45:17 -08:00
|
|
|
bytes = []
|
|
|
|
bits = imm
|
2022-12-23 10:43:18 -08:00
|
|
|
num_bytes.times do
|
2022-12-18 23:45:17 -08:00
|
|
|
bytes << (bits & 0xff)
|
|
|
|
bits >>= 8
|
|
|
|
end
|
|
|
|
if bits != 0
|
2022-12-23 10:43:18 -08:00
|
|
|
raise ArgumentError, "unexpected imm with #{num_bytes} bytes: #{imm}"
|
2022-12-18 23:45:17 -08:00
|
|
|
end
|
|
|
|
bytes
|
|
|
|
end
|
|
|
|
|
2022-12-23 10:43:18 -08:00
|
|
|
def imm8?(imm)
|
2022-12-26 14:09:45 -08:00
|
|
|
raise "negative imm not supported: #{imm}" if imm.negative? # TODO: support this
|
|
|
|
imm <= 0x7f # TODO: consider uimm
|
2022-12-23 10:43:18 -08:00
|
|
|
end
|
|
|
|
|
|
|
|
def imm32?(imm)
|
2022-12-26 14:09:45 -08:00
|
|
|
raise "negative imm not supported: #{imm}" if imm.negative? # TODO: support this
|
2022-12-23 10:43:18 -08:00
|
|
|
# TODO: consider rejecting small values
|
2022-12-26 14:09:45 -08:00
|
|
|
imm <= 0x7fff_ffff # TODO: consider uimm
|
2022-12-23 10:43:18 -08:00
|
|
|
end
|
|
|
|
|
|
|
|
def imm64?(imm)
|
2022-12-26 14:09:45 -08:00
|
|
|
raise "negative imm not supported: #{imm}" if imm.negative? # TODO: support this
|
2022-12-23 10:43:18 -08:00
|
|
|
# TODO: consider rejecting small values
|
2022-12-26 14:09:45 -08:00
|
|
|
imm <= 0x7fff_ffff_ffff_ffff # TODO: consider uimm
|
|
|
|
end
|
|
|
|
|
|
|
|
def r32?(reg)
|
|
|
|
reg.start_with?('e')
|
2022-12-23 10:43:18 -08:00
|
|
|
end
|
|
|
|
|
|
|
|
def r64?(reg)
|
2022-12-18 23:45:17 -08:00
|
|
|
reg.start_with?('r')
|
2022-12-17 13:39:35 -08:00
|
|
|
end
|
2022-12-26 14:09:45 -08:00
|
|
|
|
|
|
|
def link_labels
|
|
|
|
@bytes.each_with_index do |byte, index|
|
|
|
|
if byte.is_a?(Label)
|
|
|
|
src_index = index + 1 # offset 1 byte for rel8 itself
|
|
|
|
dst_index = @labels.fetch(byte)
|
|
|
|
rel8 = dst_index - src_index
|
|
|
|
raise "unexpected offset: #{rel8}" unless imm8?(rel8)
|
|
|
|
@bytes[index] = rel8
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2022-12-17 13:39:35 -08:00
|
|
|
end
|
|
|
|
end
|