Skip to content

Commit 433af6f

Browse files
committed
fix
1 parent 01f9073 commit 433af6f

13 files changed

+508
-11
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"1": {
3+
"BLK_HEADS": 64,
4+
"num_warps": 2
5+
},
6+
"100": {
7+
"BLK_HEADS": 16,
8+
"num_warps": 2
9+
},
10+
"1024": {
11+
"BLK_HEADS": 8,
12+
"num_warps": 2
13+
},
14+
"128": {
15+
"BLK_HEADS": 64,
16+
"num_warps": 2
17+
},
18+
"16": {
19+
"BLK_HEADS": 16,
20+
"num_warps": 1
21+
},
22+
"256": {
23+
"BLK_HEADS": 16,
24+
"num_warps": 2
25+
},
26+
"32": {
27+
"BLK_HEADS": 16,
28+
"num_warps": 1
29+
},
30+
"64": {
31+
"BLK_HEADS": 8,
32+
"num_warps": 2
33+
},
34+
"8": {
35+
"BLK_HEADS": 64,
36+
"num_warps": 4
37+
}
38+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"1024": {
3+
"BLOCK_N": 256,
4+
"num_warps": 2
5+
},
6+
"128": {
7+
"BLOCK_N": 256,
8+
"num_warps": 1
9+
},
10+
"2048": {
11+
"BLOCK_N": 64,
12+
"num_warps": 1
13+
},
14+
"256": {
15+
"BLOCK_N": 256,
16+
"num_warps": 1
17+
},
18+
"512": {
19+
"BLOCK_N": 512,
20+
"num_warps": 4
21+
},
22+
"64": {
23+
"BLOCK_N": 256,
24+
"num_warps": 1
25+
},
26+
"8": {
27+
"BLOCK_N": 512,
28+
"num_warps": 8
29+
},
30+
"800": {
31+
"BLOCK_N": 64,
32+
"num_warps": 1
33+
},
34+
"8192": {
35+
"BLOCK_N": 128,
36+
"num_warps": 2
37+
}
38+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"2048": {
3+
"BLOCK_SIZE": 2048,
4+
"num_stages": 4,
5+
"num_warps": 4
6+
}
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"256": {
3+
"BLOCK_SIZE": 256,
4+
"num_stages": 1,
5+
"num_warps": 1
6+
}
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
{
2+
"10": {
3+
"BLOCK_SIZE_K": 32,
4+
"BLOCK_SIZE_M": 16,
5+
"BLOCK_SIZE_N": 128,
6+
"GROUP_SIZE_M": 64,
7+
"NEED_TRANS": false,
8+
"num_stages": 2,
9+
"num_warps": 4
10+
},
11+
"1000": {
12+
"BLOCK_SIZE_K": 128,
13+
"BLOCK_SIZE_M": 16,
14+
"BLOCK_SIZE_N": 128,
15+
"GROUP_SIZE_M": 64,
16+
"NEED_TRANS": false,
17+
"num_stages": 2,
18+
"num_warps": 4
19+
},
20+
"10240": {
21+
"BLOCK_SIZE_K": 32,
22+
"BLOCK_SIZE_M": 64,
23+
"BLOCK_SIZE_N": 128,
24+
"GROUP_SIZE_M": 64,
25+
"NEED_TRANS": false,
26+
"num_stages": 3,
27+
"num_warps": 4
28+
},
29+
"1280": {
30+
"BLOCK_SIZE_K": 128,
31+
"BLOCK_SIZE_M": 16,
32+
"BLOCK_SIZE_N": 128,
33+
"GROUP_SIZE_M": 64,
34+
"NEED_TRANS": false,
35+
"num_stages": 2,
36+
"num_warps": 4
37+
},
38+
"160": {
39+
"BLOCK_SIZE_K": 128,
40+
"BLOCK_SIZE_M": 16,
41+
"BLOCK_SIZE_N": 128,
42+
"GROUP_SIZE_M": 64,
43+
"NEED_TRANS": false,
44+
"num_stages": 2,
45+
"num_warps": 4
46+
},
47+
"2560": {
48+
"BLOCK_SIZE_K": 64,
49+
"BLOCK_SIZE_M": 16,
50+
"BLOCK_SIZE_N": 128,
51+
"GROUP_SIZE_M": 32,
52+
"NEED_TRANS": false,
53+
"num_stages": 3,
54+
"num_warps": 4
55+
},
56+
"320": {
57+
"BLOCK_SIZE_K": 32,
58+
"BLOCK_SIZE_M": 16,
59+
"BLOCK_SIZE_N": 128,
60+
"GROUP_SIZE_M": 32,
61+
"NEED_TRANS": false,
62+
"num_stages": 3,
63+
"num_warps": 4
64+
},
65+
"640": {
66+
"BLOCK_SIZE_K": 128,
67+
"BLOCK_SIZE_M": 16,
68+
"BLOCK_SIZE_N": 128,
69+
"GROUP_SIZE_M": 64,
70+
"NEED_TRANS": false,
71+
"num_stages": 2,
72+
"num_warps": 4
73+
},
74+
"80": {
75+
"BLOCK_SIZE_K": 128,
76+
"BLOCK_SIZE_M": 16,
77+
"BLOCK_SIZE_N": 128,
78+
"GROUP_SIZE_M": 64,
79+
"NEED_TRANS": false,
80+
"num_stages": 2,
81+
"num_warps": 4
82+
}
83+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
{
2+
"1": {
3+
"BLOCK_SIZE_K": 128,
4+
"BLOCK_SIZE_M": 16,
5+
"BLOCK_SIZE_N": 32,
6+
"GROUP_SIZE_M": 1,
7+
"NEED_TRANS": false,
8+
"num_stages": 4,
9+
"num_warps": 4
10+
},
11+
"100": {
12+
"BLOCK_SIZE_K": 128,
13+
"BLOCK_SIZE_M": 16,
14+
"BLOCK_SIZE_N": 64,
15+
"GROUP_SIZE_M": 1,
16+
"NEED_TRANS": false,
17+
"num_stages": 3,
18+
"num_warps": 4
19+
},
20+
"1024": {
21+
"BLOCK_SIZE_K": 64,
22+
"BLOCK_SIZE_M": 64,
23+
"BLOCK_SIZE_N": 128,
24+
"GROUP_SIZE_M": 32,
25+
"NEED_TRANS": false,
26+
"num_stages": 3,
27+
"num_warps": 4
28+
},
29+
"128": {
30+
"BLOCK_SIZE_K": 128,
31+
"BLOCK_SIZE_M": 16,
32+
"BLOCK_SIZE_N": 128,
33+
"GROUP_SIZE_M": 16,
34+
"NEED_TRANS": false,
35+
"num_stages": 3,
36+
"num_warps": 4
37+
},
38+
"16": {
39+
"BLOCK_SIZE_K": 64,
40+
"BLOCK_SIZE_M": 16,
41+
"BLOCK_SIZE_N": 64,
42+
"GROUP_SIZE_M": 1,
43+
"NEED_TRANS": false,
44+
"num_stages": 3,
45+
"num_warps": 4
46+
},
47+
"256": {
48+
"BLOCK_SIZE_K": 128,
49+
"BLOCK_SIZE_M": 16,
50+
"BLOCK_SIZE_N": 128,
51+
"GROUP_SIZE_M": 1,
52+
"NEED_TRANS": false,
53+
"num_stages": 2,
54+
"num_warps": 4
55+
},
56+
"32": {
57+
"BLOCK_SIZE_K": 128,
58+
"BLOCK_SIZE_M": 16,
59+
"BLOCK_SIZE_N": 64,
60+
"GROUP_SIZE_M": 16,
61+
"NEED_TRANS": false,
62+
"num_stages": 3,
63+
"num_warps": 4
64+
},
65+
"64": {
66+
"BLOCK_SIZE_K": 128,
67+
"BLOCK_SIZE_M": 16,
68+
"BLOCK_SIZE_N": 64,
69+
"GROUP_SIZE_M": 16,
70+
"NEED_TRANS": false,
71+
"num_stages": 2,
72+
"num_warps": 4
73+
},
74+
"8": {
75+
"BLOCK_SIZE_K": 64,
76+
"BLOCK_SIZE_M": 16,
77+
"BLOCK_SIZE_N": 64,
78+
"GROUP_SIZE_M": 32,
79+
"NEED_TRANS": false,
80+
"num_stages": 5,
81+
"num_warps": 4
82+
}
83+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"1": {
3+
"BLOCK_SIZE": 128,
4+
"num_warps": 4
5+
},
6+
"100": {
7+
"BLOCK_SIZE": 256,
8+
"num_warps": 8
9+
},
10+
"1024": {
11+
"BLOCK_SIZE": 128,
12+
"num_warps": 4
13+
},
14+
"128": {
15+
"BLOCK_SIZE": 128,
16+
"num_warps": 4
17+
},
18+
"16": {
19+
"BLOCK_SIZE": 256,
20+
"num_warps": 8
21+
},
22+
"256": {
23+
"BLOCK_SIZE": 128,
24+
"num_warps": 4
25+
},
26+
"32": {
27+
"BLOCK_SIZE": 128,
28+
"num_warps": 4
29+
},
30+
"64": {
31+
"BLOCK_SIZE": 128,
32+
"num_warps": 8
33+
},
34+
"8": {
35+
"BLOCK_SIZE": 256,
36+
"num_warps": 8
37+
}
38+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
{
2+
"1": {
3+
"BLOCK_DIM": 512,
4+
"BLOCK_M": 1,
5+
"NUM_STAGE": 1,
6+
"num_warps": 4
7+
},
8+
"100": {
9+
"BLOCK_DIM": 1024,
10+
"BLOCK_M": 1,
11+
"NUM_STAGE": 1,
12+
"num_warps": 16
13+
},
14+
"1024": {
15+
"BLOCK_DIM": 512,
16+
"BLOCK_M": 1,
17+
"NUM_STAGE": 4,
18+
"num_warps": 2
19+
},
20+
"128": {
21+
"BLOCK_DIM": 256,
22+
"BLOCK_M": 1,
23+
"NUM_STAGE": 1,
24+
"num_warps": 4
25+
},
26+
"16": {
27+
"BLOCK_DIM": 512,
28+
"BLOCK_M": 1,
29+
"NUM_STAGE": 2,
30+
"num_warps": 16
31+
},
32+
"256": {
33+
"BLOCK_DIM": 1024,
34+
"BLOCK_M": 1,
35+
"NUM_STAGE": 1,
36+
"num_warps": 8
37+
},
38+
"32": {
39+
"BLOCK_DIM": 256,
40+
"BLOCK_M": 1,
41+
"NUM_STAGE": 1,
42+
"num_warps": 8
43+
},
44+
"64": {
45+
"BLOCK_DIM": 512,
46+
"BLOCK_M": 1,
47+
"NUM_STAGE": 1,
48+
"num_warps": 4
49+
},
50+
"8": {
51+
"BLOCK_DIM": 256,
52+
"BLOCK_M": 1,
53+
"NUM_STAGE": 1,
54+
"num_warps": 8
55+
}
56+
}

0 commit comments

Comments
 (0)