Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: mismatch parsed and output length on amd64 #2

Merged
merged 7 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: test

on:
push:
branches: [ main ]
branches: [main]
pull_request:
branches: [ main ]
branches: [main]

jobs:
example:
Expand Down Expand Up @@ -43,3 +43,8 @@ jobs:
for file in tests/*.c; do
goat $file -O3 -mavx -mfma
done
- name: Run amd64-only tests
run: |
for file in tests/amd64/*.c; do
goat $file -O3 -mavx -mfma
done
23 changes: 21 additions & 2 deletions parser_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,17 +111,22 @@ func parseAssembly(path string) (map[string][]Line, error) {
} else if nameLine.MatchString(line) {
functionName = strings.Split(line, ":")[0]
functions[functionName] = make([]Line, 0)
labelName = ""
} else if labelLine.MatchString(line) {
labelName = strings.Split(line, ":")[0]
labelName = labelName[1:]
functions[functionName] = append(functions[functionName], Line{Label: labelName})
} else if codeLine.MatchString(line) {
asm := strings.Split(line, "#")[0]
asm = strings.TrimSpace(asm)
asm := sanitizeAsm(line)
if labelName == "" {
functions[functionName] = append(functions[functionName], Line{Assembly: asm})
} else {
lines := functions[functionName]
if len(lines) == 0 {
functions[functionName] = append(functions[functionName], Line{Label: labelName})
lines = functions[functionName]
}

lines[len(lines)-1].Assembly = asm
labelName = ""
}
Expand All @@ -134,6 +139,14 @@ func parseAssembly(path string) (map[string][]Line, error) {
return functions, nil
}

func sanitizeAsm(asm string) string {
asm = strings.TrimSpace(asm)
asm = strings.Split(asm, "//")[0]
asm = strings.TrimSpace(asm)

return asm
}

func parseObjectDump(dump string, functions map[string][]Line) error {
var (
functionName string
Expand Down Expand Up @@ -161,6 +174,12 @@ func parseObjectDump(dump string, functions map[string][]Line) error {
}
binary = append(binary, s)
}

assembly = sanitizeAsm(assembly)
if strings.Contains(assembly, "nop") {
continue
}

if assembly == "" {
return fmt.Errorf("try to increase --insn-width of objdump")
} else if strings.HasPrefix(assembly, "nop") ||
Expand Down
4 changes: 3 additions & 1 deletion parser_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ func parseAssembly(path string) (map[string][]Line, error) {
functions[functionName] = append(functions[functionName], Line{Assembly: asm})
} else {
lines := functions[functionName]
lines[len(lines)-1].Assembly = asm
if len(lines) > 0 {
lines[len(lines)-1].Assembly = asm
}
labelName = ""
}
}
Expand Down
39 changes: 39 additions & 0 deletions tests/amd64/empty_line.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#include <immintrin.h>

void l2(float *a, float *b, float *res, long *len)
{
int n = *len;
float sum = 0;

__m256 acc[4];
acc[0] = _mm256_setzero_ps();
acc[1] = _mm256_setzero_ps();

while (n)
{
__m256 a_vec0 = _mm256_loadu_ps(a);
__m256 b_vec0 = _mm256_loadu_ps(b);

__m256 diff0 = _mm256_sub_ps(a_vec0, b_vec0);

acc[0] = _mm256_fmadd_ps(diff0, diff0, acc[0]);

n--;
a++;
b++;
}

acc[0] = _mm256_add_ps(acc[1], acc[0]);
if (*len >= 32)
{
acc[2] = _mm256_add_ps(acc[3], acc[2]);
acc[0] = _mm256_add_ps(acc[2], acc[0]);
}

__m256 t1 = _mm256_hadd_ps(acc[0], acc[0]);
__m256 t2 = _mm256_hadd_ps(t1, t1);
__m128 t3 = _mm256_extractf128_ps(t2, 1);
__m128 t4 = _mm_add_ps(_mm256_castps256_ps128(t2), t3);
sum += _mm_cvtss_f32(t4);
*res = sum;
}
Empty file added tests/arm64/.gitkeep
Empty file.
Loading