File tree Expand file tree Collapse file tree 13 files changed +508
-11
lines changed
common/triton_utils/autotune_kernel_configs/triton_3.4.0/NVIDIA_H200
models/qwen3next/triton_kernel/fla
mode_backend/chunked_prefill Expand file tree Collapse file tree 13 files changed +508
-11
lines changed Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLK_HEADS" : 64 ,
4+ "num_warps" : 2
5+ },
6+ "100" : {
7+ "BLK_HEADS" : 16 ,
8+ "num_warps" : 2
9+ },
10+ "1024" : {
11+ "BLK_HEADS" : 8 ,
12+ "num_warps" : 2
13+ },
14+ "128" : {
15+ "BLK_HEADS" : 64 ,
16+ "num_warps" : 2
17+ },
18+ "16" : {
19+ "BLK_HEADS" : 16 ,
20+ "num_warps" : 1
21+ },
22+ "256" : {
23+ "BLK_HEADS" : 16 ,
24+ "num_warps" : 2
25+ },
26+ "32" : {
27+ "BLK_HEADS" : 16 ,
28+ "num_warps" : 1
29+ },
30+ "64" : {
31+ "BLK_HEADS" : 8 ,
32+ "num_warps" : 2
33+ },
34+ "8" : {
35+ "BLK_HEADS" : 64 ,
36+ "num_warps" : 4
37+ }
38+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1024" : {
3+ "BLOCK_N" : 256 ,
4+ "num_warps" : 2
5+ },
6+ "128" : {
7+ "BLOCK_N" : 256 ,
8+ "num_warps" : 1
9+ },
10+ "2048" : {
11+ "BLOCK_N" : 64 ,
12+ "num_warps" : 1
13+ },
14+ "256" : {
15+ "BLOCK_N" : 256 ,
16+ "num_warps" : 1
17+ },
18+ "512" : {
19+ "BLOCK_N" : 512 ,
20+ "num_warps" : 4
21+ },
22+ "64" : {
23+ "BLOCK_N" : 256 ,
24+ "num_warps" : 1
25+ },
26+ "8" : {
27+ "BLOCK_N" : 512 ,
28+ "num_warps" : 8
29+ },
30+ "800" : {
31+ "BLOCK_N" : 64 ,
32+ "num_warps" : 1
33+ },
34+ "8192" : {
35+ "BLOCK_N" : 128 ,
36+ "num_warps" : 2
37+ }
38+ }
Original file line number Diff line number Diff line change 1+ {
2+ "2048" : {
3+ "BLOCK_SIZE" : 2048 ,
4+ "num_stages" : 4 ,
5+ "num_warps" : 4
6+ }
7+ }
Original file line number Diff line number Diff line change 1+ {
2+ "256" : {
3+ "BLOCK_SIZE" : 256 ,
4+ "num_stages" : 1 ,
5+ "num_warps" : 1
6+ }
7+ }
Original file line number Diff line number Diff line change 1+ {
2+ "10" : {
3+ "BLOCK_SIZE_K" : 32 ,
4+ "BLOCK_SIZE_M" : 16 ,
5+ "BLOCK_SIZE_N" : 128 ,
6+ "GROUP_SIZE_M" : 64 ,
7+ "NEED_TRANS" : false ,
8+ "num_stages" : 2 ,
9+ "num_warps" : 4
10+ },
11+ "1000" : {
12+ "BLOCK_SIZE_K" : 128 ,
13+ "BLOCK_SIZE_M" : 16 ,
14+ "BLOCK_SIZE_N" : 128 ,
15+ "GROUP_SIZE_M" : 64 ,
16+ "NEED_TRANS" : false ,
17+ "num_stages" : 2 ,
18+ "num_warps" : 4
19+ },
20+ "10240" : {
21+ "BLOCK_SIZE_K" : 32 ,
22+ "BLOCK_SIZE_M" : 64 ,
23+ "BLOCK_SIZE_N" : 128 ,
24+ "GROUP_SIZE_M" : 64 ,
25+ "NEED_TRANS" : false ,
26+ "num_stages" : 3 ,
27+ "num_warps" : 4
28+ },
29+ "1280" : {
30+ "BLOCK_SIZE_K" : 128 ,
31+ "BLOCK_SIZE_M" : 16 ,
32+ "BLOCK_SIZE_N" : 128 ,
33+ "GROUP_SIZE_M" : 64 ,
34+ "NEED_TRANS" : false ,
35+ "num_stages" : 2 ,
36+ "num_warps" : 4
37+ },
38+ "160" : {
39+ "BLOCK_SIZE_K" : 128 ,
40+ "BLOCK_SIZE_M" : 16 ,
41+ "BLOCK_SIZE_N" : 128 ,
42+ "GROUP_SIZE_M" : 64 ,
43+ "NEED_TRANS" : false ,
44+ "num_stages" : 2 ,
45+ "num_warps" : 4
46+ },
47+ "2560" : {
48+ "BLOCK_SIZE_K" : 64 ,
49+ "BLOCK_SIZE_M" : 16 ,
50+ "BLOCK_SIZE_N" : 128 ,
51+ "GROUP_SIZE_M" : 32 ,
52+ "NEED_TRANS" : false ,
53+ "num_stages" : 3 ,
54+ "num_warps" : 4
55+ },
56+ "320" : {
57+ "BLOCK_SIZE_K" : 32 ,
58+ "BLOCK_SIZE_M" : 16 ,
59+ "BLOCK_SIZE_N" : 128 ,
60+ "GROUP_SIZE_M" : 32 ,
61+ "NEED_TRANS" : false ,
62+ "num_stages" : 3 ,
63+ "num_warps" : 4
64+ },
65+ "640" : {
66+ "BLOCK_SIZE_K" : 128 ,
67+ "BLOCK_SIZE_M" : 16 ,
68+ "BLOCK_SIZE_N" : 128 ,
69+ "GROUP_SIZE_M" : 64 ,
70+ "NEED_TRANS" : false ,
71+ "num_stages" : 2 ,
72+ "num_warps" : 4
73+ },
74+ "80" : {
75+ "BLOCK_SIZE_K" : 128 ,
76+ "BLOCK_SIZE_M" : 16 ,
77+ "BLOCK_SIZE_N" : 128 ,
78+ "GROUP_SIZE_M" : 64 ,
79+ "NEED_TRANS" : false ,
80+ "num_stages" : 2 ,
81+ "num_warps" : 4
82+ }
83+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLOCK_SIZE_K" : 128 ,
4+ "BLOCK_SIZE_M" : 16 ,
5+ "BLOCK_SIZE_N" : 32 ,
6+ "GROUP_SIZE_M" : 1 ,
7+ "NEED_TRANS" : false ,
8+ "num_stages" : 4 ,
9+ "num_warps" : 4
10+ },
11+ "100" : {
12+ "BLOCK_SIZE_K" : 128 ,
13+ "BLOCK_SIZE_M" : 16 ,
14+ "BLOCK_SIZE_N" : 64 ,
15+ "GROUP_SIZE_M" : 1 ,
16+ "NEED_TRANS" : false ,
17+ "num_stages" : 3 ,
18+ "num_warps" : 4
19+ },
20+ "1024" : {
21+ "BLOCK_SIZE_K" : 64 ,
22+ "BLOCK_SIZE_M" : 64 ,
23+ "BLOCK_SIZE_N" : 128 ,
24+ "GROUP_SIZE_M" : 32 ,
25+ "NEED_TRANS" : false ,
26+ "num_stages" : 3 ,
27+ "num_warps" : 4
28+ },
29+ "128" : {
30+ "BLOCK_SIZE_K" : 128 ,
31+ "BLOCK_SIZE_M" : 16 ,
32+ "BLOCK_SIZE_N" : 128 ,
33+ "GROUP_SIZE_M" : 16 ,
34+ "NEED_TRANS" : false ,
35+ "num_stages" : 3 ,
36+ "num_warps" : 4
37+ },
38+ "16" : {
39+ "BLOCK_SIZE_K" : 64 ,
40+ "BLOCK_SIZE_M" : 16 ,
41+ "BLOCK_SIZE_N" : 64 ,
42+ "GROUP_SIZE_M" : 1 ,
43+ "NEED_TRANS" : false ,
44+ "num_stages" : 3 ,
45+ "num_warps" : 4
46+ },
47+ "256" : {
48+ "BLOCK_SIZE_K" : 128 ,
49+ "BLOCK_SIZE_M" : 16 ,
50+ "BLOCK_SIZE_N" : 128 ,
51+ "GROUP_SIZE_M" : 1 ,
52+ "NEED_TRANS" : false ,
53+ "num_stages" : 2 ,
54+ "num_warps" : 4
55+ },
56+ "32" : {
57+ "BLOCK_SIZE_K" : 128 ,
58+ "BLOCK_SIZE_M" : 16 ,
59+ "BLOCK_SIZE_N" : 64 ,
60+ "GROUP_SIZE_M" : 16 ,
61+ "NEED_TRANS" : false ,
62+ "num_stages" : 3 ,
63+ "num_warps" : 4
64+ },
65+ "64" : {
66+ "BLOCK_SIZE_K" : 128 ,
67+ "BLOCK_SIZE_M" : 16 ,
68+ "BLOCK_SIZE_N" : 64 ,
69+ "GROUP_SIZE_M" : 16 ,
70+ "NEED_TRANS" : false ,
71+ "num_stages" : 2 ,
72+ "num_warps" : 4
73+ },
74+ "8" : {
75+ "BLOCK_SIZE_K" : 64 ,
76+ "BLOCK_SIZE_M" : 16 ,
77+ "BLOCK_SIZE_N" : 64 ,
78+ "GROUP_SIZE_M" : 32 ,
79+ "NEED_TRANS" : false ,
80+ "num_stages" : 5 ,
81+ "num_warps" : 4
82+ }
83+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLOCK_SIZE" : 128 ,
4+ "num_warps" : 4
5+ },
6+ "100" : {
7+ "BLOCK_SIZE" : 256 ,
8+ "num_warps" : 8
9+ },
10+ "1024" : {
11+ "BLOCK_SIZE" : 128 ,
12+ "num_warps" : 4
13+ },
14+ "128" : {
15+ "BLOCK_SIZE" : 128 ,
16+ "num_warps" : 4
17+ },
18+ "16" : {
19+ "BLOCK_SIZE" : 256 ,
20+ "num_warps" : 8
21+ },
22+ "256" : {
23+ "BLOCK_SIZE" : 128 ,
24+ "num_warps" : 4
25+ },
26+ "32" : {
27+ "BLOCK_SIZE" : 128 ,
28+ "num_warps" : 4
29+ },
30+ "64" : {
31+ "BLOCK_SIZE" : 128 ,
32+ "num_warps" : 8
33+ },
34+ "8" : {
35+ "BLOCK_SIZE" : 256 ,
36+ "num_warps" : 8
37+ }
38+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLOCK_DIM" : 512 ,
4+ "BLOCK_M" : 1 ,
5+ "NUM_STAGE" : 1 ,
6+ "num_warps" : 4
7+ },
8+ "100" : {
9+ "BLOCK_DIM" : 1024 ,
10+ "BLOCK_M" : 1 ,
11+ "NUM_STAGE" : 1 ,
12+ "num_warps" : 16
13+ },
14+ "1024" : {
15+ "BLOCK_DIM" : 512 ,
16+ "BLOCK_M" : 1 ,
17+ "NUM_STAGE" : 4 ,
18+ "num_warps" : 2
19+ },
20+ "128" : {
21+ "BLOCK_DIM" : 256 ,
22+ "BLOCK_M" : 1 ,
23+ "NUM_STAGE" : 1 ,
24+ "num_warps" : 4
25+ },
26+ "16" : {
27+ "BLOCK_DIM" : 512 ,
28+ "BLOCK_M" : 1 ,
29+ "NUM_STAGE" : 2 ,
30+ "num_warps" : 16
31+ },
32+ "256" : {
33+ "BLOCK_DIM" : 1024 ,
34+ "BLOCK_M" : 1 ,
35+ "NUM_STAGE" : 1 ,
36+ "num_warps" : 8
37+ },
38+ "32" : {
39+ "BLOCK_DIM" : 256 ,
40+ "BLOCK_M" : 1 ,
41+ "NUM_STAGE" : 1 ,
42+ "num_warps" : 8
43+ },
44+ "64" : {
45+ "BLOCK_DIM" : 512 ,
46+ "BLOCK_M" : 1 ,
47+ "NUM_STAGE" : 1 ,
48+ "num_warps" : 4
49+ },
50+ "8" : {
51+ "BLOCK_DIM" : 256 ,
52+ "BLOCK_M" : 1 ,
53+ "NUM_STAGE" : 1 ,
54+ "num_warps" : 8
55+ }
56+ }
You can’t perform that action at this time.
0 commit comments