Skip to content

Commit

Permalink
chore: rename vw to vTw
Browse files Browse the repository at this point in the history
  • Loading branch information
sp301415 committed Feb 18, 2025
1 parent dc1b6e8 commit e72fe6c
Show file tree
Hide file tree
Showing 3 changed files with 325 additions and 325 deletions.
150 changes: 75 additions & 75 deletions internal/asmgen/fft.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,23 @@ func fftInPlaceAVX2() {
VMOVUPD(Mem{Base: coeffs, Index: jt, Scale: 8}, vReal)
VMOVUPD(Mem{Base: coeffs, Index: jt, Scale: 8, Disp: 32}, vImag)

vwReal := YMM()
VMULPD(wReal, vReal, vwReal)
VFNMADD231PD(wImag, vImag, vwReal)
vTwReal := YMM()
VMULPD(wReal, vReal, vTwReal)
VFNMADD231PD(wImag, vImag, vTwReal)

vwImag := YMM()
VMULPD(wImag, vReal, vwImag)
VFMADD231PD(wReal, vImag, vwImag)
vTwImag := YMM()
VMULPD(wImag, vReal, vTwImag)
VFMADD231PD(wReal, vImag, vTwImag)

uOutReal := YMM()
VADDPD(vwReal, uReal, uOutReal)
VADDPD(vTwReal, uReal, uOutReal)
uOutImag := YMM()
VADDPD(vwImag, uImag, uOutImag)
VADDPD(vTwImag, uImag, uOutImag)

vOutReal := YMM()
VSUBPD(vwReal, uReal, vOutReal)
VSUBPD(vTwReal, uReal, vOutReal)
vOutImag := YMM()
VSUBPD(vwImag, uImag, vOutImag)
VSUBPD(vTwImag, uImag, vOutImag)

VMOVUPD(uOutReal, Mem{Base: coeffs, Index: j, Scale: 8})
VMOVUPD(uOutImag, Mem{Base: coeffs, Index: j, Scale: 8, Disp: 32})
Expand Down Expand Up @@ -113,23 +113,23 @@ func fftInPlaceAVX2() {
VMOVUPD(Mem{Base: coeffs, Index: jt, Scale: 8}, vReal)
VMOVUPD(Mem{Base: coeffs, Index: jt, Scale: 8, Disp: 32}, vImag)

vwReal = YMM()
VMULPD(wReal, vReal, vwReal)
VFNMADD231PD(wImag, vImag, vwReal)
vTwReal = YMM()
VMULPD(wReal, vReal, vTwReal)
VFNMADD231PD(wImag, vImag, vTwReal)

vwImag = YMM()
VMULPD(wImag, vReal, vwImag)
VFMADD231PD(wReal, vImag, vwImag)
vTwImag = YMM()
VMULPD(wImag, vReal, vTwImag)
VFMADD231PD(wReal, vImag, vTwImag)

uOutReal = YMM()
VADDPD(vwReal, uReal, uOutReal)
VADDPD(vTwReal, uReal, uOutReal)
uOutImag = YMM()
VADDPD(vwImag, uImag, uOutImag)
VADDPD(vTwImag, uImag, uOutImag)

vOutReal = YMM()
VSUBPD(vwReal, uReal, vOutReal)
VSUBPD(vTwReal, uReal, vOutReal)
vOutImag = YMM()
VSUBPD(vwImag, uImag, vOutImag)
VSUBPD(vTwImag, uImag, vOutImag)

VMOVUPD(uOutReal, Mem{Base: coeffs, Index: j, Scale: 8})
VMOVUPD(uOutImag, Mem{Base: coeffs, Index: j, Scale: 8, Disp: 32})
Expand Down Expand Up @@ -173,21 +173,21 @@ func fftInPlaceAVX2() {
VMOVUPD(Mem{Base: coeffs, Index: j, Scale: 8, Disp: 32}, uImag)
VMOVUPD(Mem{Base: coeffs, Index: j, Scale: 8, Disp: 48}, vImag)

vwReal = XMM()
VMULPD(wReal, vReal, vwReal)
VFNMADD231PD(wImag, vImag, vwReal)
vTwReal = XMM()
VMULPD(wReal, vReal, vTwReal)
VFNMADD231PD(wImag, vImag, vTwReal)

vwImag = XMM()
VMULPD(wImag, vReal, vwImag)
VFMADD231PD(wReal, vImag, vwImag)
vTwImag = XMM()
VMULPD(wImag, vReal, vTwImag)
VFMADD231PD(wReal, vImag, vTwImag)

uOutReal, vOutReal = XMM(), XMM()
VADDPD(vwReal, uReal, uOutReal)
VSUBPD(vwReal, uReal, vOutReal)
VADDPD(vTwReal, uReal, uOutReal)
VSUBPD(vTwReal, uReal, vOutReal)

uOutImag, vOutImag = XMM(), XMM()
VADDPD(vwImag, uImag, uOutImag)
VSUBPD(vwImag, uImag, vOutImag)
VADDPD(vTwImag, uImag, uOutImag)
VSUBPD(vTwImag, uImag, vOutImag)

VMOVUPD(uOutReal, Mem{Base: coeffs, Index: j, Scale: 8})
VMOVUPD(vOutReal, Mem{Base: coeffs, Index: j, Scale: 8, Disp: 16})
Expand Down Expand Up @@ -227,29 +227,29 @@ func fftInPlaceAVX2() {
VSHUFPD(Imm(0b1111), uRealvReal, uRealvReal, vReal)
VSHUFPD(Imm(0b1111), uImagvImag, uImagvImag, vImag)

// vwRealImag: (vwReal0, vwImag0, vwReal1, vwImag1)
vwRealImag := YMM()
VMULPD(wImagReal, vImag, vwRealImag)
VFMADDSUB231PD(wRealImag, vReal, vwRealImag)
// vTwRealImag: (vTwReal0, vTwImag0, vTwReal1, vTwImag1)
vTwRealImag := YMM()
VMULPD(wImagReal, vImag, vTwRealImag)
VFMADDSUB231PD(wRealImag, vReal, vTwRealImag)

// uReal: (uReal0, uReal0, uReal1, uReal1)
// uImag: (uImag0, uImag0, uImag1, uImag1)
uReal, uImag = YMM(), YMM()
VSHUFPD(Imm(0b0000), uRealvReal, uRealvReal, uReal)
VSHUFPD(Imm(0b0000), uImagvImag, uImagvImag, uImag)

// vwReal: (vwReal0, vwReal0, vwReal1, vwReal1)
// vwImag: (vwImag0, vwImag0, vwImag1, vwImag1)
vwReal, vwImag = YMM(), YMM()
VSHUFPD(Imm(0b0000), vwRealImag, vwRealImag, vwReal)
VSHUFPD(Imm(0b1111), vwRealImag, vwRealImag, vwImag)
// vTwReal: (vTwReal0, vTwReal0, vTwReal1, vTwReal1)
// vTwImag: (vTwImag0, vTwImag0, vTwImag1, vTwImag1)
vTwReal, vTwImag = YMM(), YMM()
VSHUFPD(Imm(0b0000), vTwRealImag, vTwRealImag, vTwReal)
VSHUFPD(Imm(0b1111), vTwRealImag, vTwRealImag, vTwImag)

VSUBPD(vwReal, zero, vwReal)
VSUBPD(vwImag, zero, vwImag)
VSUBPD(vTwReal, zero, vTwReal)
VSUBPD(vTwImag, zero, vTwImag)

uOut, vOut := YMM(), YMM()
VADDSUBPD(vwReal, uReal, uOut)
VADDSUBPD(vwImag, uImag, vOut)
VADDSUBPD(vTwReal, uReal, uOut)
VADDSUBPD(vTwImag, uImag, vOut)

VMOVUPD(uOut, Mem{Base: coeffs, Index: j, Scale: 8})
VMOVUPD(vOut, Mem{Base: coeffs, Index: j, Scale: 8, Disp: 32})
Expand Down Expand Up @@ -314,14 +314,14 @@ func ifftInPlaceAVX2() {
VSHUFPD(Imm(0b0000), vOutRealImag, vOutRealImag, vOutReal)
VSHUFPD(Imm(0b1111), vOutRealImag, vOutRealImag, vOutImag)

// vwOutRealImag: (vwOutReal0, vwOutImag0, vwOutReal1, vwOutImag1)
vwOutRealImag := YMM()
VMULPD(wImagReal, vOutImag, vwOutRealImag)
VFMADDSUB231PD(wRealImag, vOutReal, vwOutRealImag)
// vTwOutRealImag: (vTwOutReal0, vTwOutImag0, vTwOutReal1, vTwOutImag1)
vTwOutRealImag := YMM()
VMULPD(wImagReal, vOutImag, vTwOutRealImag)
VFMADDSUB231PD(wRealImag, vOutReal, vTwOutRealImag)

uOut, vOut := YMM(), YMM()
VSHUFPD(Imm(0b0000), vwOutRealImag, uOutReal, uOut)
VSHUFPD(Imm(0b1111), vwOutRealImag, uOutImag, vOut)
VSHUFPD(Imm(0b0000), vTwOutRealImag, uOutReal, uOut)
VSHUFPD(Imm(0b1111), vTwOutRealImag, uOutImag, vOut)

VMOVUPD(uOut, Mem{Base: coeffs, Index: j, Scale: 8})
VMOVUPD(vOut, Mem{Base: coeffs, Index: j, Scale: 8, Disp: 32})
Expand Down Expand Up @@ -358,18 +358,18 @@ func ifftInPlaceAVX2() {
VSUBPD(vReal, uReal, vOutReal)
VSUBPD(vImag, uImag, vOutImag)

vwOutReal := XMM()
VMULPD(wReal, vOutReal, vwOutReal)
VFNMADD231PD(wImag, vOutImag, vwOutReal)
vTwOutReal := XMM()
VMULPD(wReal, vOutReal, vTwOutReal)
VFNMADD231PD(wImag, vOutImag, vTwOutReal)

vwOutImag := XMM()
VMULPD(wImag, vOutReal, vwOutImag)
VFMADD231PD(wReal, vOutImag, vwOutImag)
vTwOutImag := XMM()
VMULPD(wImag, vOutReal, vTwOutImag)
VFMADD231PD(wReal, vOutImag, vTwOutImag)

VMOVUPD(uOutReal, Mem{Base: coeffs, Index: j, Scale: 8})
VMOVUPD(vwOutReal, Mem{Base: coeffs, Index: j, Scale: 8, Disp: 16})
VMOVUPD(vTwOutReal, Mem{Base: coeffs, Index: j, Scale: 8, Disp: 16})
VMOVUPD(uOutImag, Mem{Base: coeffs, Index: j, Scale: 8, Disp: 32})
VMOVUPD(vwOutImag, Mem{Base: coeffs, Index: j, Scale: 8, Disp: 48})
VMOVUPD(vTwOutImag, Mem{Base: coeffs, Index: j, Scale: 8, Disp: 48})

ADDQ(Imm(8), j)

Expand Down Expand Up @@ -425,18 +425,18 @@ func ifftInPlaceAVX2() {
VSUBPD(vReal, uReal, vOutReal)
VSUBPD(vImag, uImag, vOutImag)

vwOutReal = YMM()
VMULPD(wReal, vOutReal, vwOutReal)
VFNMADD231PD(wImag, vOutImag, vwOutReal)
vTwOutReal = YMM()
VMULPD(wReal, vOutReal, vTwOutReal)
VFNMADD231PD(wImag, vOutImag, vTwOutReal)

vwOutImag = YMM()
VMULPD(wImag, vOutReal, vwOutImag)
VFMADD231PD(wReal, vOutImag, vwOutImag)
vTwOutImag = YMM()
VMULPD(wImag, vOutReal, vTwOutImag)
VFMADD231PD(wReal, vOutImag, vTwOutImag)

VMOVUPD(uOutReal, Mem{Base: coeffs, Index: j, Scale: 8})
VMOVUPD(uOutImag, Mem{Base: coeffs, Index: j, Scale: 8, Disp: 32})
VMOVUPD(vwOutReal, Mem{Base: coeffs, Index: jt, Scale: 8})
VMOVUPD(vwOutImag, Mem{Base: coeffs, Index: jt, Scale: 8, Disp: 32})
VMOVUPD(vTwOutReal, Mem{Base: coeffs, Index: jt, Scale: 8})
VMOVUPD(vTwOutImag, Mem{Base: coeffs, Index: jt, Scale: 8, Disp: 32})

ADDQ(Imm(8), j)
ADDQ(Imm(8), jt)
Expand Down Expand Up @@ -490,23 +490,23 @@ func ifftInPlaceAVX2() {
VSUBPD(vReal, uReal, vOutReal)
VSUBPD(vImag, uImag, vOutImag)

vwOutReal = YMM()
VMULPD(wReal, vOutReal, vwOutReal)
VFNMADD231PD(wImag, vOutImag, vwOutReal)
vTwOutReal = YMM()
VMULPD(wReal, vOutReal, vTwOutReal)
VFNMADD231PD(wImag, vOutImag, vTwOutReal)

vwOutImag = YMM()
VMULPD(wImag, vOutReal, vwOutImag)
VFMADD231PD(wReal, vOutImag, vwOutImag)
vTwOutImag = YMM()
VMULPD(wImag, vOutReal, vTwOutImag)
VFMADD231PD(wReal, vOutImag, vTwOutImag)

VMULPD(scale, uOutReal, uOutReal)
VMULPD(scale, uOutImag, uOutImag)
VMULPD(scale, vwOutReal, vwOutReal)
VMULPD(scale, vwOutImag, vwOutImag)
VMULPD(scale, vTwOutReal, vTwOutReal)
VMULPD(scale, vTwOutImag, vTwOutImag)

VMOVUPD(uOutReal, Mem{Base: coeffs, Index: j, Scale: 8})
VMOVUPD(uOutImag, Mem{Base: coeffs, Index: j, Scale: 8, Disp: 32})
VMOVUPD(vwOutReal, Mem{Base: coeffs, Index: jt, Scale: 8})
VMOVUPD(vwOutImag, Mem{Base: coeffs, Index: jt, Scale: 8, Disp: 32})
VMOVUPD(vTwOutReal, Mem{Base: coeffs, Index: jt, Scale: 8})
VMOVUPD(vTwOutImag, Mem{Base: coeffs, Index: jt, Scale: 8, Disp: 32})

ADDQ(Imm(8), j)
ADDQ(Imm(8), jt)
Expand Down
Loading

0 comments on commit e72fe6c

Please sign in to comment.