More floating point operators.

This commit is contained in:
losfair 2019-04-09 17:08:31 +08:00
parent e32816b06b
commit f781b0eb02
2 changed files with 123 additions and 3 deletions

View File

@ -571,6 +571,17 @@ impl X64FunctionCode {
src1: Location,
src2: Location,
dst: Location,
) {
Self::emit_relaxed_avx_base(a, m, |a, _, src1, src2, dst| op(a, src1, src2, dst), src1, src2, dst)
}
fn emit_relaxed_avx_base<F: FnOnce(&mut Assembler, &mut Machine, XMM, XMMOrMemory, XMM)>(
a: &mut Assembler,
m: &mut Machine,
op: F,
src1: Location,
src2: Location,
dst: Location,
) {
let tmp1 = m.acquire_temp_xmm().unwrap();
let tmp2 = m.acquire_temp_xmm().unwrap();
@ -618,10 +629,10 @@ impl X64FunctionCode {
match dst {
Location::XMM(x) => {
op(a, src1, src2, x);
op(a, m, src1, src2, x);
},
Location::Memory(_, _) => {
op(a, src1, src2, tmp3);
op(a, m, src1, src2, tmp3);
a.emit_mov(Size::S64, Location::XMM(tmp3), dst);
},
_ => unreachable!(),
@ -1422,7 +1433,7 @@ impl FunctionCodeGenerator for X64FunctionCode {
Size::S32, loc, ret,
);
}
Operator::F32Const { value } => self.value_stack.push((Location::Imm32(value.bits()), LocalOrTemp::Temp)),
Operator::F32Add => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vaddss),
Operator::F32Sub => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsubss),
@ -1442,6 +1453,43 @@ impl FunctionCodeGenerator for X64FunctionCode {
Operator::F32Trunc => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundss_trunc),
Operator::F32Sqrt => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsqrtss),
Operator::F32Copysign => {
let loc_b = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
let loc_a = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
let ret = self.machine.acquire_locations(a, &[WpType::F32], false)[0];
let tmp1 = self.machine.acquire_temp_gpr().unwrap();
let tmp2 = self.machine.acquire_temp_gpr().unwrap();
a.emit_mov(Size::S32, loc_a, Location::GPR(tmp1));
a.emit_mov(Size::S32, loc_b, Location::GPR(tmp2));
a.emit_and(Size::S32, Location::Imm32(0x7fffffffu32), Location::GPR(tmp1));
a.emit_and(Size::S32, Location::Imm32(0x80000000u32), Location::GPR(tmp2));
a.emit_or(Size::S32, Location::GPR(tmp2), Location::GPR(tmp1));
a.emit_mov(Size::S32, Location::GPR(tmp1), ret);
self.machine.release_temp_gpr(tmp2);
self.machine.release_temp_gpr(tmp1);
}
Operator::F32Abs => {
let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
let ret = self.machine.acquire_locations(a, &[WpType::F32], false)[0];
let tmp = self.machine.acquire_temp_gpr().unwrap();
a.emit_mov(Size::S32, loc, Location::GPR(tmp));
a.emit_and(Size::S32, Location::Imm32(0x7fffffffu32), Location::GPR(tmp));
a.emit_mov(Size::S32, Location::GPR(tmp), ret);
self.machine.release_temp_gpr(tmp);
}
Operator::F32Neg => {
let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
let ret = self.machine.acquire_locations(a, &[WpType::F32], false)[0];
let tmp = self.machine.acquire_temp_gpr().unwrap();
a.emit_mov(Size::S32, loc, Location::GPR(tmp));
a.emit_btc_gpr_imm8_32(31, tmp);
a.emit_mov(Size::S32, Location::GPR(tmp), ret);
self.machine.release_temp_gpr(tmp);
}
Operator::F64Const { value } => self.value_stack.push((Location::Imm64(value.bits()), LocalOrTemp::Temp)),
Operator::F64Add => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vaddsd),
Operator::F64Sub => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsubsd),
@ -1461,6 +1509,61 @@ impl FunctionCodeGenerator for X64FunctionCode {
Operator::F64Trunc => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundsd_trunc),
Operator::F64Sqrt => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsqrtsd),
Operator::F64Copysign => {
let loc_b = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
let loc_a = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
let ret = self.machine.acquire_locations(a, &[WpType::F64], false)[0];
let tmp1 = self.machine.acquire_temp_gpr().unwrap();
let tmp2 = self.machine.acquire_temp_gpr().unwrap();
let c = self.machine.acquire_temp_gpr().unwrap();
a.emit_mov(Size::S64, loc_a, Location::GPR(tmp1));
a.emit_mov(Size::S64, loc_b, Location::GPR(tmp2));
a.emit_mov(Size::S64, Location::Imm64(0x7fffffffffffffffu64), Location::GPR(c));
a.emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp1));
a.emit_mov(Size::S64, Location::Imm64(0x8000000000000000u64), Location::GPR(c));
a.emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp2));
a.emit_or(Size::S64, Location::GPR(tmp2), Location::GPR(tmp1));
a.emit_mov(Size::S64, Location::GPR(tmp1), ret);
self.machine.release_temp_gpr(c);
self.machine.release_temp_gpr(tmp2);
self.machine.release_temp_gpr(tmp1);
}
Operator::F64Abs => {
let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
let ret = self.machine.acquire_locations(a, &[WpType::F64], false)[0];
let tmp = self.machine.acquire_temp_gpr().unwrap();
let c = self.machine.acquire_temp_gpr().unwrap();
a.emit_mov(Size::S64, loc, Location::GPR(tmp));
a.emit_mov(Size::S64, Location::Imm64(0x7fffffffffffffffu64), Location::GPR(c));
a.emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp));
a.emit_mov(Size::S64, Location::GPR(tmp), ret);
self.machine.release_temp_gpr(c);
self.machine.release_temp_gpr(tmp);
}
Operator::F64Neg => {
let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
let ret = self.machine.acquire_locations(a, &[WpType::F64], false)[0];
let tmp = self.machine.acquire_temp_gpr().unwrap();
a.emit_mov(Size::S64, loc, Location::GPR(tmp));
a.emit_btc_gpr_imm8_64(63, tmp);
a.emit_mov(Size::S64, Location::GPR(tmp), ret);
self.machine.release_temp_gpr(tmp);
}
Operator::F64PromoteF32 => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcvtss2sd),
Operator::F32DemoteF64 => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcvtsd2ss),
Operator::Call { function_index } => {
let function_index = function_index as usize;
let label = self

View File

@ -116,6 +116,9 @@ pub trait Emitter {
fn emit_movzx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location);
fn emit_movsx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location);
fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR);
fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR);
fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
fn emit_vsubss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
@ -159,6 +162,9 @@ pub trait Emitter {
fn emit_vroundsd_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
fn emit_vroundsd_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
fn emit_vcvtss2sd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
fn emit_vcvtsd2ss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
fn emit_ud2(&mut self);
fn emit_ret(&mut self);
fn emit_call_label(&mut self, label: Self::Label);
@ -661,6 +667,14 @@ impl Emitter for Assembler {
}
}
fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR) {
dynasm!(self ; btc Rd(dst as u8), BYTE (src as i8));
}
fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR) {
dynasm!(self ; btc Rq(dst as u8), BYTE (src as i8));
}
avx_fn!(vaddss, emit_vaddss);
avx_fn!(vaddsd, emit_vaddsd);
@ -700,6 +714,9 @@ impl Emitter for Assembler {
avx_fn!(vsqrtss, emit_vsqrtss);
avx_fn!(vsqrtsd, emit_vsqrtsd);
avx_fn!(vcvtss2sd, emit_vcvtss2sd);
avx_fn!(vcvtsd2ss, emit_vcvtsd2ss);
avx_round_fn!(vroundss, emit_vroundss_nearest, 0);
avx_round_fn!(vroundss, emit_vroundss_floor, 1);
avx_round_fn!(vroundss, emit_vroundss_ceil, 2);