Skip to content

Commit

Permalink
Simple num_stages fix without re-tuning for performance
Browse files Browse the repository at this point in the history
  • Loading branch information
gshtras committed Dec 19, 2024
1 parent ae77a01 commit c1ccc5e
Show file tree
Hide file tree
Showing 43 changed files with 789 additions and 789 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 1,
"num_warps": 2,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 1
Expand All @@ -16,7 +16,7 @@
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 2,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 2
Expand All @@ -27,7 +27,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 1,
"num_warps": 2,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 2
Expand All @@ -38,7 +38,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 1,
"num_warps": 1,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 2
Expand All @@ -49,7 +49,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 2
Expand All @@ -60,7 +60,7 @@
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 1,
"num_warps": 1,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 2
Expand All @@ -71,7 +71,7 @@
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"num_warps": 2,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 1
Expand All @@ -82,7 +82,7 @@
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"num_warps": 2,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 2
Expand All @@ -93,7 +93,7 @@
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 2
Expand All @@ -104,7 +104,7 @@
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"num_warps": 4,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 2
Expand All @@ -115,7 +115,7 @@
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 4,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 2
Expand All @@ -126,7 +126,7 @@
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 4,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 1
Expand All @@ -137,7 +137,7 @@
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 4,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 2
Expand All @@ -148,7 +148,7 @@
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 32,
"kpack": 2
Expand All @@ -159,7 +159,7 @@
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 2
Expand All @@ -170,7 +170,7 @@
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 2
Expand All @@ -181,7 +181,7 @@
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 1
Expand All @@ -192,7 +192,7 @@
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0,
"matrix_instr_nonkdim": 16,
"kpack": 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"2": {
Expand All @@ -14,7 +14,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 1,
"num_warps": 2,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"4": {
Expand All @@ -23,7 +23,7 @@
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"8": {
Expand All @@ -32,7 +32,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"16": {
Expand All @@ -41,7 +41,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"24": {
Expand All @@ -50,7 +50,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 1,
"num_warps": 2,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"32": {
Expand All @@ -59,7 +59,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"num_warps": 4,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"48": {
Expand All @@ -68,7 +68,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"num_warps": 4,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"64": {
Expand All @@ -77,7 +77,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"num_warps": 1,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"96": {
Expand All @@ -86,7 +86,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"num_warps": 2,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"128": {
Expand All @@ -95,7 +95,7 @@
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"num_warps": 4,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"256": {
Expand All @@ -104,7 +104,7 @@
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"512": {
Expand All @@ -113,7 +113,7 @@
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"num_warps": 4,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"1024": {
Expand All @@ -122,7 +122,7 @@
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"1536": {
Expand All @@ -131,7 +131,7 @@
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"2048": {
Expand All @@ -140,7 +140,7 @@
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"3072": {
Expand All @@ -149,7 +149,7 @@
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
},
"4096": {
Expand All @@ -158,7 +158,7 @@
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 0,
"num_stages": 2,
"waves_per_eu": 0
}
}
Loading

0 comments on commit c1ccc5e

Please sign in to comment.