Skip to content

Commit

Permalink
Add vfmul/vfsub
Browse files Browse the repository at this point in the history
  • Loading branch information
jiegec committed Dec 13, 2023
1 parent b9a54d4 commit 8572230
Show file tree
Hide file tree
Showing 10 changed files with 65 additions and 57 deletions.
4 changes: 0 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@ TODO List:

### vfrstp.b/h

### vfsub.s/d

### vfmul.s/d

### vfmax.s/d

### vfmin.s/d
Expand Down
15 changes: 8 additions & 7 deletions code/gen_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,12 +763,13 @@

for width in ["s", "d"]:
m = members_fp[width]
with open(f"vfdiv_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = a.{m}[i] / b.{m}[i];",
file=f,
)
print(f"}}", file=f)
for name, op in [("div", "/"), ("mul", "*"), ("sub", "-"), ("add", "+")]:
with open(f"vf{name}_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = a.{m}[i] {op} b.{m}[i];",
file=f,
)
print(f"}}", file=f)

os.system("clang-format -i *.cpp *.h")
3 changes: 3 additions & 0 deletions code/vfadd_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.fp64[i] = a.fp64[i] + b.fp64[i];
}
3 changes: 3 additions & 0 deletions code/vfadd_s.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.fp32[i] = a.fp32[i] + b.fp32[i];
}
3 changes: 3 additions & 0 deletions code/vfmul_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.fp64[i] = a.fp64[i] * b.fp64[i];
}
3 changes: 3 additions & 0 deletions code/vfmul_s.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.fp32[i] = a.fp32[i] * b.fp32[i];
}
3 changes: 3 additions & 0 deletions code/vfsub_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.fp64[i] = a.fp64[i] - b.fp64[i];
}
3 changes: 3 additions & 0 deletions code/vfsub_s.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.fp32[i] = a.fp32[i] - b.fp32[i];
}
53 changes: 8 additions & 45 deletions docs/lsx/float_computation.md
Original file line number Diff line number Diff line change
@@ -1,50 +1,13 @@
# Floating Point Computation

## __m128d __lsx_vfadd_d (__m128d a, __m128d b)

### Synopsis

```c++
__m128d __lsx_vfadd_d (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vfadd.d vr, vr, vr
CPU Flags: LSX
```
### Description
Add double precision floating point elements in `a` to `b` and store the result in `dst`.
### Operation
```c++
for (int i = 0;i < 2;i++) {
dst.fp64[i] = a.fp64[i] + b.fp64[i];
}
```

## __m128 __lsx_vfadd_s (__m128 a, __m128 b)

### Synopsis

```c++
__m128d __lsx_vfadd_s (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vfadd.s vr, vr, vr
CPU Flags: LSX
```
### Description
Add single precision floating point elements in `a` to `b` and store the result in `dst`.
### Operation
```c++
for (int i = 0;i < 4;i++) {
dst.fp32[i] = a.fp32[i] + b.fp32[i];
}
```
{{ vfadd('s') }}
{{ vfadd('d') }}

{{ vfdiv('s') }}
{{ vfdiv('d') }}

{{ vfmul('s') }}
{{ vfmul('d') }}

{{ vfsub('s') }}
{{ vfsub('d') }}
32 changes: 31 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,14 +358,44 @@ def vfcmp(cond):
"""
)

@env.macro
def vfmul(name):
precision = precisions[name]
fp_type = fp_types[name]
return instruction(
intrinsic=f"{fp_type} __lsx_vfmul_{name} ({fp_type} a, {fp_type} b)",
instr=f"vfmul.{name} vr, vr, vr",
desc=f"Multiply {precision} precision floating point elements in `a` and elements in `b`.",
)

@env.macro
def vfdiv(name):
precision = precisions[name]
fp_type = fp_types[name]
return instruction(
intrinsic=f"{fp_type} __lsx_vfdiv_{name} ({fp_type} a, {fp_type} b)",
instr=f"vfdiv.{name} vr, vr, vr",
desc=f"Divide {fp_type} precision floating point elements in `a` by elements in `b`.",
desc=f"Divide {precision} precision floating point elements in `a` by elements in `b`.",
)

@env.macro
def vfadd(name):
precision = precisions[name]
fp_type = fp_types[name]
return instruction(
intrinsic=f"{fp_type} __lsx_vfadd_{name} ({fp_type} a, {fp_type} b)",
instr=f"vfadd.{name} vr, vr, vr",
desc=f"Add {precision} precision floating point elements in `a` to elements in `b`.",
)

@env.macro
def vfsub(name):
precision = precisions[name]
fp_type = fp_types[name]
return instruction(
intrinsic=f"{fp_type} __lsx_vfsub_{name} ({fp_type} a, {fp_type} b)",
instr=f"vfsub.{name} vr, vr, vr",
desc=f"Subtract {precision} precision floating point elements in `a` by elements in `b`.",
)

@env.macro
Expand Down

0 comments on commit 8572230

Please sign in to comment.