Skip to content

Commit

Permalink
Refactor and turn on lowering for extend-add-pairwise
Browse files Browse the repository at this point in the history
  • Loading branch information
jlb6740 committed Jul 31, 2021
1 parent e373ddf commit e519fca
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 80 deletions.
24 changes: 8 additions & 16 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,8 @@ fn write_testsuite_tests(
let testname = extract_name(path);

writeln!(out, "#[test]")?;
if x64_should_panic(testsuite, &testname, strategy) {
writeln!(out, r#"#[should_panic]"#)?;
// Ignore when using QEMU for running tests (limited memory).
} else if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) {
if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) {
writeln!(out, "#[ignore]")?;
}

Expand All @@ -182,19 +180,6 @@ fn write_testsuite_tests(
Ok(())
}

/// For x64 backend features that are not supported yet, mark tests as panicking, so
/// they stop "passing" once the features are properly implemented.
fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
if !platform_is_x64() || strategy != "Cranelift" {
return false;
}

match (testsuite, testname) {
_ => {}
}
false
}

/// Ignore tests that aren't supported yet.
fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
match strategy {
Expand All @@ -217,6 +202,13 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
("simd", _) if cfg!(feature = "old-x86-backend") => return true,
// No simd support yet for s390x.
("simd", _) if platform_is_s390x() => return true,
// These are new instructions that are only known to be supported for x64.
("simd", "simd_i16x8_extadd_pairwise_i8x16")
| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
if !platform_is_x64() =>
{
return true
}
_ => {}
},
_ => panic!("unrecognized strategy"),
Expand Down
36 changes: 15 additions & 21 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4114,40 +4114,34 @@ pub(crate) fn define(
Inst::new(
"uwiden_high",
r#"
Lane-wise integer extended pairwise addition producing extended results
(twice wider results than the input)
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

ig.push(
Inst::new(
"extended_pairwise_add_signed",
r#"
Widen the high lanes of `x` using signed extension.
Widen the high lanes of `x` using unsigned extension.
This will double the lane width and halve the number of lanes.
This will double the lane width and halve the number of lanes.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

let x = &Operand::new("x", I8or16or32xN);
let y = &Operand::new("y", I8or16or32xN);
let a = &Operand::new("a", I8or16or32xN);

ig.push(
Inst::new(
"extended_pairwise_add_unsigned",
"iadd_pairwise",
r#"
Widen the high lanes of `x` extending with zeros.
This will double the lane width and halve the number of lanes.
Does lane-wise integer pairwise addition on two operands, putting the
combined results into a single vector result. Here a pair refers to adjacent
lanes in a vector, i.e. i*2 + (i*2+1) for i == num_lanes/2. The first operand
pairwise add results will make up the low half of the resulting vector while
the second operand pairwise add results will make up the upper half of the
resulting vector.
"#,
&formats.unary,
&formats.binary,
)
.operands_in(vec![x])
.operands_in(vec![x, y])
.operands_out(vec![a]),
);

Expand Down
8 changes: 3 additions & 5 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3519,11 +3519,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
});
}

Opcode::ExtendedPairwiseAddSigned
| Opcode::ExtendedPairwiseAddUnsigned
| Opcode::ConstAddr
| Opcode::Vconcat
| Opcode::Vsplit => unimplemented!("lowering {}", op),
Opcode::IaddPairwise | Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit => {
unimplemented!("lowering {}", op)
}
}

Ok(())
Expand Down
3 changes: 1 addition & 2 deletions cranelift/codegen/src/isa/s390x/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2869,8 +2869,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::SqmulRoundSat
| Opcode::FvpromoteLow
| Opcode::Fvdemote
| Opcode::ExtendedPairwiseAddSigned
| Opcode::ExtendedPairwiseAddUnsigned => {
| Opcode::IaddPairwise => {
// TODO
unimplemented!("Vector ops not implemented.");
}
Expand Down
97 changes: 67 additions & 30 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4927,18 +4927,33 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
}
}
Opcode::ExtendedPairwiseAddSigned | Opcode::ExtendedPairwiseAddUnsigned => {
// Extended pairwise addition instructions computes extended sums within adjacent
// pairs of lanes of a SIMD vector, producing a SIMD vector with half as many lanes.
// Instruction sequences taken from instruction SPEC PR https://github.com/WebAssembly/simd/pull/380
/*
let input_ty = ctx.input_ty(insn, 0);
let output_ty = ctx.output_ty(insn, 0);
let src = put_input_in_reg(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
unreachable!();
match op {
Opcode::ExtendedPairwiseAddSigned => match (input_ty, output_ty) {
Opcode::IaddPairwise => {
if let (Some(swiden_low), Some(swiden_high)) = (
matches_input(ctx, inputs[0], Opcode::SwidenLow),
matches_input(ctx, inputs[1], Opcode::SwidenHigh),
) {
let swiden_input = &[
InsnInput {
insn: swiden_low,
input: 0,
},
InsnInput {
insn: swiden_high,
input: 0,
},
];

let input_ty = ctx.input_ty(swiden_low, 0);
let output_ty = ctx.output_ty(insn, 0);
let src0 = put_input_in_reg(ctx, swiden_input[0]);
let src1 = put_input_in_reg(ctx, swiden_input[1]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if src0 != src1 {
unimplemented!(
"iadd_pairwise not implemented for general case with different inputs"
);
}
match (input_ty, output_ty) {
(types::I8X16, types::I16X8) => {
static MUL_CONST: [u8; 16] = [0x01; 16];
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
Expand All @@ -4949,7 +4964,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
RegMem::reg(mul_const_reg.to_reg()),
dst,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src), dst));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src0), dst));
}
(types::I16X8, types::I32X4) => {
static MUL_CONST: [u8; 16] = [
Expand All @@ -4959,25 +4974,49 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pmaddwd,
RegMem::reg(mul_const_reg.to_reg()),
dst,
));
}
_ => unreachable!(
"Type pattern not supported {:?}-{:?} not supported for {:?}.",
input_ty, output_ty, op
),
},
Opcode::ExtendedPairwiseAddUnsigned => match (input_ty, output_ty) {
_ => {
unimplemented!("Type not supported for {:?}", op);
}
}
} else if let (Some(uwiden_low), Some(uwiden_high)) = (
matches_input(ctx, inputs[0], Opcode::UwidenLow),
matches_input(ctx, inputs[1], Opcode::UwidenHigh),
) {
let uwiden_input = &[
InsnInput {
insn: uwiden_low,
input: 0,
},
InsnInput {
insn: uwiden_high,
input: 0,
},
];

let input_ty = ctx.input_ty(uwiden_low, 0);
let output_ty = ctx.output_ty(insn, 0);
let src0 = put_input_in_reg(ctx, uwiden_input[0]);
let src1 = put_input_in_reg(ctx, uwiden_input[1]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if src0 != src1 {
unimplemented!(
"iadd_pairwise not implemented for general case with different inputs"
);
}
match (input_ty, output_ty) {
(types::I8X16, types::I16X8) => {
static MUL_CONST: [u8; 16] = [0x01; 16];
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pmaddubsw,
RegMem::reg(mul_const_reg.to_reg()),
Expand All @@ -4997,7 +5036,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
pxor_const_reg,
types::I16X8,
));
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pxor,
RegMem::reg(pxor_const_reg.to_reg()),
Expand All @@ -5021,7 +5060,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
RegMem::reg(madd_const_reg.to_reg()),
dst,
));
static ADDD_CONST2: [u8; 16] = [
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00, 0x01, 0x00,
Expand All @@ -5040,14 +5078,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
dst,
));
}
_ => unreachable!(
"Type pattern not supported {:?}-{:?} not supported for {:?}.",
input_ty, output_ty, op
),
},
_ => unreachable!("{:?} not supported.", op),
_ => {
unimplemented!("Type not supported for {:?}", op);
}
}
} else {
unimplemented!("Operands not supported for {:?}", op);
}
*/
}
Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => {
let input_ty = ctx.input_ty(insn, 0);
Expand Down
Binary file modified cranelift/codegen/src/preopt.serialized
Binary file not shown.
3 changes: 1 addition & 2 deletions cranelift/interpreter/src/step.rs
Original file line number Diff line number Diff line change
Expand Up @@ -630,8 +630,7 @@ where
Opcode::Fence => unimplemented!("Fence"),
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
Opcode::ExtendedPairwiseAddSigned => unimplemented!("ExtendedPairwiseAddSigned"),
Opcode::ExtendedPairwiseAddUnsigned => unimplemented!("ExtendedPairwiseAddUnsigned"),
Opcode::IaddPairwise => unimplemented!("IaddPairwise"),

// TODO: these instructions should be removed once the new backend makes these obsolete
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the
Expand Down
16 changes: 12 additions & 4 deletions cranelift/wasm/src/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1881,19 +1881,27 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
}
Operator::I16x8ExtAddPairwiseI8x16S => {
let a = pop1_with_bitcast(state, I8X16, builder);
state.push1(builder.ins().extended_pairwise_add_signed(a))
let widen_low = builder.ins().swiden_low(a);
let widen_high = builder.ins().swiden_high(a);
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
}
Operator::I32x4ExtAddPairwiseI16x8S => {
let a = pop1_with_bitcast(state, I16X8, builder);
state.push1(builder.ins().extended_pairwise_add_signed(a))
let widen_low = builder.ins().swiden_low(a);
let widen_high = builder.ins().swiden_high(a);
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
}
Operator::I16x8ExtAddPairwiseI8x16U => {
let a = pop1_with_bitcast(state, I8X16, builder);
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
let widen_low = builder.ins().uwiden_low(a);
let widen_high = builder.ins().uwiden_high(a);
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
}
Operator::I32x4ExtAddPairwiseI16x8U => {
let a = pop1_with_bitcast(state, I16X8, builder);
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
let widen_low = builder.ins().uwiden_low(a);
let widen_high = builder.ins().uwiden_high(a);
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
}
Operator::F32x4Ceil | Operator::F64x2Ceil => {
// This is something of a misuse of `type_of`, because that produces the return type
Expand Down

0 comments on commit e519fca

Please sign in to comment.