mirror of
https://github.com/fluencelabs/wasmer
synced 2025-03-16 16:20:49 +00:00
Floating point with AVX.
This commit is contained in:
parent
154f7f8fd3
commit
e32816b06b
@ -525,6 +525,7 @@ impl X64FunctionCode {
|
||||
(_, Location::Imm32(_)) | (_, Location::Imm64(_)) => RelaxMode::DstToGPR,
|
||||
(Location::Imm64(_), Location::Memory(_, _)) => RelaxMode::SrcToGPR,
|
||||
(Location::Imm64(_), Location::GPR(_)) if (op as *const u8 != Assembler::emit_mov as *const u8) => RelaxMode::SrcToGPR,
|
||||
(_, Location::XMM(_)) => RelaxMode::SrcToGPR,
|
||||
_ if (op as *const u8 == Assembler::emit_imul as *const u8) => RelaxMode::BothToGPR, // TODO: optimize this
|
||||
_ => RelaxMode::Direct,
|
||||
};
|
||||
@ -563,6 +564,75 @@ impl X64FunctionCode {
|
||||
}
|
||||
}
|
||||
|
||||
fn emit_relaxed_avx(
|
||||
a: &mut Assembler,
|
||||
m: &mut Machine,
|
||||
op: fn(&mut Assembler, XMM, XMMOrMemory, XMM),
|
||||
src1: Location,
|
||||
src2: Location,
|
||||
dst: Location,
|
||||
) {
|
||||
let tmp1 = m.acquire_temp_xmm().unwrap();
|
||||
let tmp2 = m.acquire_temp_xmm().unwrap();
|
||||
let tmp3 = m.acquire_temp_xmm().unwrap();
|
||||
let tmpg = m.acquire_temp_gpr().unwrap();
|
||||
|
||||
let src1 = match src1 {
|
||||
Location::XMM(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
a.emit_mov(Size::S64, src1, Location::XMM(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
a.emit_mov(Size::S32, src1, Location::GPR(tmpg));
|
||||
a.emit_mov(Size::S32, Location::GPR(tmpg), Location::XMM(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
a.emit_mov(Size::S64, src1, Location::GPR(tmpg));
|
||||
a.emit_mov(Size::S64, Location::GPR(tmpg), Location::XMM(tmp1));
|
||||
tmp1
|
||||
}
|
||||
_ => unreachable!()
|
||||
};
|
||||
|
||||
let src2 = match src2 {
|
||||
Location::XMM(x) => XMMOrMemory::XMM(x),
|
||||
Location::Memory(base, disp) => XMMOrMemory::Memory(base, disp),
|
||||
Location::GPR(_) => {
|
||||
a.emit_mov(Size::S64, src2, Location::XMM(tmp2));
|
||||
XMMOrMemory::XMM(tmp2)
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
a.emit_mov(Size::S32, src2, Location::GPR(tmpg));
|
||||
a.emit_mov(Size::S32, Location::GPR(tmpg), Location::XMM(tmp2));
|
||||
XMMOrMemory::XMM(tmp2)
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
a.emit_mov(Size::S64, src2, Location::GPR(tmpg));
|
||||
a.emit_mov(Size::S64, Location::GPR(tmpg), Location::XMM(tmp2));
|
||||
XMMOrMemory::XMM(tmp2)
|
||||
}
|
||||
_ => unreachable!()
|
||||
};
|
||||
|
||||
match dst {
|
||||
Location::XMM(x) => {
|
||||
op(a, src1, src2, x);
|
||||
},
|
||||
Location::Memory(_, _) => {
|
||||
op(a, src1, src2, tmp3);
|
||||
a.emit_mov(Size::S64, Location::XMM(tmp3), dst);
|
||||
},
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
m.release_temp_gpr(tmpg);
|
||||
m.release_temp_xmm(tmp3);
|
||||
m.release_temp_xmm(tmp2);
|
||||
m.release_temp_xmm(tmp1);
|
||||
}
|
||||
|
||||
fn emit_binop_i32(
|
||||
a: &mut Assembler,
|
||||
m: &mut Machine,
|
||||
@ -849,6 +919,33 @@ impl X64FunctionCode {
|
||||
value_stack.push((ret, LocalOrTemp::Temp));
|
||||
}
|
||||
|
||||
fn emit_fp_binop_avx(
|
||||
a: &mut Assembler,
|
||||
m: &mut Machine,
|
||||
value_stack: &mut Vec<(Location, LocalOrTemp)>,
|
||||
f: fn(&mut Assembler, XMM, XMMOrMemory, XMM),
|
||||
) {
|
||||
let loc_b = get_location_released(a, m, value_stack.pop().unwrap());
|
||||
let loc_a = get_location_released(a, m, value_stack.pop().unwrap());
|
||||
let ret = m.acquire_locations(a, &[WpType::F64], false)[0];
|
||||
value_stack.push((ret, LocalOrTemp::Temp));
|
||||
|
||||
Self::emit_relaxed_avx(a, m, f, loc_a, loc_b, ret);
|
||||
}
|
||||
|
||||
fn emit_fp_unop_avx(
|
||||
a: &mut Assembler,
|
||||
m: &mut Machine,
|
||||
value_stack: &mut Vec<(Location, LocalOrTemp)>,
|
||||
f: fn(&mut Assembler, XMM, XMMOrMemory, XMM),
|
||||
) {
|
||||
let loc = get_location_released(a, m, value_stack.pop().unwrap());
|
||||
let ret = m.acquire_locations(a, &[WpType::F64], false)[0];
|
||||
value_stack.push((ret, LocalOrTemp::Temp));
|
||||
|
||||
Self::emit_relaxed_avx(a, m, f, loc, loc, ret);
|
||||
}
|
||||
|
||||
// This function must not use any temporary register before `cb` is called.
|
||||
fn emit_call_sysv<I: Iterator<Item = Location>, F: FnOnce(&mut Assembler)>(a: &mut Assembler, m: &mut Machine, cb: F, params: I) {
|
||||
let params: Vec<_> = params.collect();
|
||||
@ -859,6 +956,15 @@ impl X64FunctionCode {
|
||||
a.emit_push(Size::S64, Location::GPR(*r));
|
||||
}
|
||||
|
||||
// Save used XMM registers.
|
||||
let used_xmms = m.get_used_xmms();
|
||||
if used_xmms.len() > 0 {
|
||||
a.emit_sub(Size::S64, Location::Imm32((used_xmms.len() * 8) as u32), Location::GPR(GPR::RSP));
|
||||
for (i, r) in used_xmms.iter().enumerate() {
|
||||
a.emit_mov(Size::S64, Location::XMM(*r), Location::Memory(GPR::RSP, (i * 8) as i32));
|
||||
}
|
||||
}
|
||||
|
||||
let mut stack_offset: usize = 0;
|
||||
|
||||
// Calculate stack offset.
|
||||
@ -914,6 +1020,14 @@ impl X64FunctionCode {
|
||||
a.emit_add(Size::S64, Location::Imm32(stack_offset as u32), Location::GPR(GPR::RSP));
|
||||
}
|
||||
|
||||
// Restore XMMs.
|
||||
if used_xmms.len() > 0 {
|
||||
for (i, r) in used_xmms.iter().enumerate() {
|
||||
a.emit_mov(Size::S64, Location::Memory(GPR::RSP, (i * 8) as i32), Location::XMM(*r));
|
||||
}
|
||||
a.emit_add(Size::S64, Location::Imm32((used_xmms.len() * 8) as u32), Location::GPR(GPR::RSP));
|
||||
}
|
||||
|
||||
// Restore GPRs.
|
||||
for r in used_gprs.iter().rev() {
|
||||
a.emit_pop(Size::S64, Location::GPR(*r));
|
||||
@ -1155,7 +1269,7 @@ impl FunctionCodeGenerator for X64FunctionCode {
|
||||
let loc_a = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
|
||||
let ret = self.machine.acquire_locations(a, &[WpType::I32], false)[0];
|
||||
a.emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX));
|
||||
a.emit_xor(Size::S32, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX));
|
||||
a.emit_cdq();
|
||||
Self::emit_relaxed_xdiv(a, &mut self.machine, Assembler::emit_idiv, Size::S32, loc_b);
|
||||
a.emit_mov(Size::S32, Location::GPR(GPR::RAX), ret);
|
||||
self.value_stack.push((ret, LocalOrTemp::Temp));
|
||||
@ -1232,7 +1346,7 @@ impl FunctionCodeGenerator for X64FunctionCode {
|
||||
let loc_a = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
|
||||
let ret = self.machine.acquire_locations(a, &[WpType::I64], false)[0];
|
||||
a.emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX));
|
||||
a.emit_xor(Size::S64, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX));
|
||||
a.emit_cqo();
|
||||
Self::emit_relaxed_xdiv(a, &mut self.machine, Assembler::emit_idiv, Size::S64, loc_b);
|
||||
a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret);
|
||||
self.value_stack.push((ret, LocalOrTemp::Temp));
|
||||
@ -1308,6 +1422,45 @@ impl FunctionCodeGenerator for X64FunctionCode {
|
||||
Size::S32, loc, ret,
|
||||
);
|
||||
}
|
||||
|
||||
Operator::F32Const { value } => self.value_stack.push((Location::Imm32(value.bits()), LocalOrTemp::Temp)),
|
||||
Operator::F32Add => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vaddss),
|
||||
Operator::F32Sub => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsubss),
|
||||
Operator::F32Mul => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vmulss),
|
||||
Operator::F32Div => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vdivss),
|
||||
Operator::F32Max => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vmaxss),
|
||||
Operator::F32Min => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vminss),
|
||||
Operator::F32Eq => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcmpeqss),
|
||||
Operator::F32Ne => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcmpneqss),
|
||||
Operator::F32Lt => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcmpltss),
|
||||
Operator::F32Le => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcmpless),
|
||||
Operator::F32Gt => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcmpgtss),
|
||||
Operator::F32Ge => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcmpgess),
|
||||
Operator::F32Nearest => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundss_nearest),
|
||||
Operator::F32Floor => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundss_floor),
|
||||
Operator::F32Ceil => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundss_ceil),
|
||||
Operator::F32Trunc => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundss_trunc),
|
||||
Operator::F32Sqrt => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsqrtss),
|
||||
|
||||
Operator::F64Const { value } => self.value_stack.push((Location::Imm64(value.bits()), LocalOrTemp::Temp)),
|
||||
Operator::F64Add => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vaddsd),
|
||||
Operator::F64Sub => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsubsd),
|
||||
Operator::F64Mul => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vmulsd),
|
||||
Operator::F64Div => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vdivsd),
|
||||
Operator::F64Max => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vmaxsd),
|
||||
Operator::F64Min => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vminsd),
|
||||
Operator::F64Eq => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcmpeqsd),
|
||||
Operator::F64Ne => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcmpneqsd),
|
||||
Operator::F64Lt => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcmpltsd),
|
||||
Operator::F64Le => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcmplesd),
|
||||
Operator::F64Gt => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcmpgtsd),
|
||||
Operator::F64Ge => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcmpgesd),
|
||||
Operator::F64Nearest => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundsd_nearest),
|
||||
Operator::F64Floor => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundsd_floor),
|
||||
Operator::F64Ceil => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundsd_ceil),
|
||||
Operator::F64Trunc => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundsd_trunc),
|
||||
Operator::F64Sqrt => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsqrtsd),
|
||||
|
||||
Operator::Call { function_index } => {
|
||||
let function_index = function_index as usize;
|
||||
let label = self
|
||||
|
@ -69,6 +69,12 @@ pub enum Size {
|
||||
S64,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum XMMOrMemory {
|
||||
XMM(XMM),
|
||||
Memory(GPR, i32),
|
||||
}
|
||||
|
||||
pub trait Emitter {
|
||||
type Label;
|
||||
type Offset;
|
||||
@ -81,6 +87,8 @@ pub trait Emitter {
|
||||
fn emit_mov(&mut self, sz: Size, src: Location, dst: Location);
|
||||
fn emit_lea(&mut self, sz: Size, src: Location, dst: Location);
|
||||
fn emit_lea_label(&mut self, label: Self::Label, dst: Location);
|
||||
fn emit_cdq(&mut self);
|
||||
fn emit_cqo(&mut self);
|
||||
fn emit_xor(&mut self, sz: Size, src: Location, dst: Location);
|
||||
fn emit_jmp(&mut self, condition: Condition, label: Self::Label);
|
||||
fn emit_jmp_location(&mut self, loc: Location);
|
||||
@ -108,6 +116,49 @@ pub trait Emitter {
|
||||
fn emit_movzx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location);
|
||||
fn emit_movsx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location);
|
||||
|
||||
fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vsubss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vsubsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vmulss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vmulsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vdivss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vdivsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vmaxss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vmaxsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vminss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vminsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
|
||||
fn emit_vcmpeqss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vcmpeqsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
|
||||
fn emit_vcmpneqss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vcmpneqsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
|
||||
fn emit_vcmpltss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vcmpltsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
|
||||
fn emit_vcmpless(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vcmplesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
|
||||
fn emit_vcmpgtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vcmpgtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
|
||||
fn emit_vcmpgess(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vcmpgesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
|
||||
fn emit_vsqrtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vsqrtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
|
||||
fn emit_vroundss_nearest(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vroundss_floor(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vroundss_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vroundss_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vroundsd_nearest(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vroundsd_floor(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vroundsd_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vroundsd_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
|
||||
fn emit_ud2(&mut self);
|
||||
fn emit_ret(&mut self);
|
||||
fn emit_call_label(&mut self, label: Self::Label);
|
||||
@ -306,6 +357,28 @@ macro_rules! trap_op {
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! avx_fn {
|
||||
($ins:ident, $name:ident) => {
|
||||
fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) {
|
||||
match src2 {
|
||||
XMMOrMemory::XMM(x) => dynasm!(self ; $ins Rx((dst as u8)), Rx((src1 as u8)), Rx((x as u8))),
|
||||
XMMOrMemory::Memory(base, disp) => dynasm!(self ; $ins Rx((dst as u8)), Rx((src1 as u8)), [Rq((base as u8)) + disp]),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! avx_round_fn {
|
||||
($ins:ident, $name:ident, $mode:expr) => {
|
||||
fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) {
|
||||
match src2 {
|
||||
XMMOrMemory::XMM(x) => dynasm!(self ; $ins Rx((dst as u8)), Rx((src1 as u8)), Rx((x as u8)), $mode),
|
||||
XMMOrMemory::Memory(base, disp) => dynasm!(self ; $ins Rx((dst as u8)), Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Emitter for Assembler {
|
||||
type Label = DynamicLabel;
|
||||
type Offset = AssemblyOffset;
|
||||
@ -386,6 +459,12 @@ impl Emitter for Assembler {
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
fn emit_cdq(&mut self) {
|
||||
dynasm!(self ; cdq);
|
||||
}
|
||||
fn emit_cqo(&mut self) {
|
||||
dynasm!(self ; cqo);
|
||||
}
|
||||
fn emit_xor(&mut self, sz: Size, src: Location, dst: Location) {
|
||||
binop_all_nofp!(xor, self, sz, src, dst, {unreachable!()});
|
||||
}
|
||||
@ -582,6 +661,54 @@ impl Emitter for Assembler {
|
||||
}
|
||||
}
|
||||
|
||||
avx_fn!(vaddss, emit_vaddss);
|
||||
avx_fn!(vaddsd, emit_vaddsd);
|
||||
|
||||
avx_fn!(vsubss, emit_vsubss);
|
||||
avx_fn!(vsubsd, emit_vsubsd);
|
||||
|
||||
avx_fn!(vmulss, emit_vmulss);
|
||||
avx_fn!(vmulsd, emit_vmulsd);
|
||||
|
||||
avx_fn!(vdivss, emit_vdivss);
|
||||
avx_fn!(vdivsd, emit_vdivsd);
|
||||
|
||||
avx_fn!(vmaxss, emit_vmaxss);
|
||||
avx_fn!(vmaxsd, emit_vmaxsd);
|
||||
|
||||
avx_fn!(vminss, emit_vminss);
|
||||
avx_fn!(vminsd, emit_vminsd);
|
||||
|
||||
avx_fn!(vcmpeqss, emit_vcmpeqss);
|
||||
avx_fn!(vcmpeqsd, emit_vcmpeqsd);
|
||||
|
||||
avx_fn!(vcmpneqss, emit_vcmpneqss);
|
||||
avx_fn!(vcmpneqsd, emit_vcmpneqsd);
|
||||
|
||||
avx_fn!(vcmpltss, emit_vcmpltss);
|
||||
avx_fn!(vcmpltsd, emit_vcmpltsd);
|
||||
|
||||
avx_fn!(vcmpless, emit_vcmpless);
|
||||
avx_fn!(vcmplesd, emit_vcmplesd);
|
||||
|
||||
avx_fn!(vcmpgtss, emit_vcmpgtss);
|
||||
avx_fn!(vcmpgtsd, emit_vcmpgtsd);
|
||||
|
||||
avx_fn!(vcmpgess, emit_vcmpgess);
|
||||
avx_fn!(vcmpgesd, emit_vcmpgesd);
|
||||
|
||||
avx_fn!(vsqrtss, emit_vsqrtss);
|
||||
avx_fn!(vsqrtsd, emit_vsqrtsd);
|
||||
|
||||
avx_round_fn!(vroundss, emit_vroundss_nearest, 0);
|
||||
avx_round_fn!(vroundss, emit_vroundss_floor, 1);
|
||||
avx_round_fn!(vroundss, emit_vroundss_ceil, 2);
|
||||
avx_round_fn!(vroundss, emit_vroundss_trunc, 3);
|
||||
avx_round_fn!(vroundsd, emit_vroundsd_nearest, 0);
|
||||
avx_round_fn!(vroundsd, emit_vroundsd_floor, 1);
|
||||
avx_round_fn!(vroundsd, emit_vroundsd_ceil, 2);
|
||||
avx_round_fn!(vroundsd, emit_vroundsd_trunc, 3);
|
||||
|
||||
fn emit_ud2(&mut self) {
|
||||
dynasm!(self ; ud2);
|
||||
}
|
||||
|
@ -186,9 +186,7 @@ impl Machine {
|
||||
for ty in tys {
|
||||
let loc = match *ty {
|
||||
WpType::F32 | WpType::F64 => {
|
||||
self.pick_xmm().map(Location::XMM).or_else(
|
||||
|| self.pick_gpr().map(Location::GPR)
|
||||
)
|
||||
self.pick_xmm().map(Location::XMM)
|
||||
},
|
||||
WpType::I32 | WpType::I64 => {
|
||||
self.pick_gpr().map(Location::GPR)
|
||||
|
Loading…
x
Reference in New Issue
Block a user