Skip to content

Commit 544230f

Browse files
authored
gemm x86 tile-N threading, drop testutil layer hook function (#6428)
* test threading * drop testutil layer hook function
1 parent bee91ae commit 544230f

15 files changed

+485
-227
lines changed

src/layer/x86/gemm_x86.cpp

Lines changed: 305 additions & 133 deletions
Large diffs are not rendered by default.

tests/test_binaryop.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ static int test_binaryop(const ncnn::Mat& _a, const ncnn::Mat& _b, int flag)
5555
ab[0] = a;
5656
ab[1] = b;
5757

58-
int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, 0, flag);
58+
int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, flag);
5959
if (ret != 0)
6060
{
6161
fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b.dims=%d b=(%d %d %d %d) op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b.dims, b.w, b.h, b.d, b.c, op_type);
@@ -97,7 +97,7 @@ static int test_binaryop(const ncnn::Mat& _a, float b, int flag)
9797

9898
std::vector<ncnn::Mat> weights(0);
9999

100-
int ret = test_layer("BinaryOp", pd, weights, a, 0.001, 0, flag);
100+
int ret = test_layer("BinaryOp", pd, weights, a, 0.001, flag);
101101
if (ret != 0)
102102
{
103103
fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b=%f op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b, op_type);

tests/test_binaryop_1.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ static int test_binaryop(const ncnn::Mat& _a, const ncnn::Mat& _b, int flag)
5555
ab[0] = a;
5656
ab[1] = b;
5757

58-
int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, 0, flag);
58+
int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, flag);
5959
if (ret != 0)
6060
{
6161
fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b.dims=%d b=(%d %d %d %d) op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b.dims, b.w, b.h, b.d, b.c, op_type);
@@ -97,7 +97,7 @@ static int test_binaryop(const ncnn::Mat& _a, float b, int flag)
9797

9898
std::vector<ncnn::Mat> weights(0);
9999

100-
int ret = test_layer("BinaryOp", pd, weights, a, 0.001, 0, flag);
100+
int ret = test_layer("BinaryOp", pd, weights, a, 0.001, flag);
101101
if (ret != 0)
102102
{
103103
fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b=%f op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b, op_type);

tests/test_binaryop_2.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ static int test_binaryop(const ncnn::Mat& _a, const ncnn::Mat& _b, int flag)
5555
ab[0] = a;
5656
ab[1] = b;
5757

58-
int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, 0, flag);
58+
int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, flag);
5959
if (ret != 0)
6060
{
6161
fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b.dims=%d b=(%d %d %d %d) op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b.dims, b.w, b.h, b.d, b.c, op_type);
@@ -97,7 +97,7 @@ static int test_binaryop(const ncnn::Mat& _a, float b, int flag)
9797

9898
std::vector<ncnn::Mat> weights(0);
9999

100-
int ret = test_layer("BinaryOp", pd, weights, a, 0.001, 0, flag);
100+
int ret = test_layer("BinaryOp", pd, weights, a, 0.001, flag);
101101
if (ret != 0)
102102
{
103103
fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b=%f op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b, op_type);

tests/test_binaryop_3.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ static int test_binaryop(const ncnn::Mat& _a, const ncnn::Mat& _b, int flag)
5555
ab[0] = a;
5656
ab[1] = b;
5757

58-
int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, 0, flag);
58+
int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, flag);
5959
if (ret != 0)
6060
{
6161
fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b.dims=%d b=(%d %d %d %d) op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b.dims, b.w, b.h, b.d, b.c, op_type);
@@ -97,7 +97,7 @@ static int test_binaryop(const ncnn::Mat& _a, float b, int flag)
9797

9898
std::vector<ncnn::Mat> weights(0);
9999

100-
int ret = test_layer("BinaryOp", pd, weights, a, 0.001, 0, flag);
100+
int ret = test_layer("BinaryOp", pd, weights, a, 0.001, flag);
101101
if (ret != 0)
102102
{
103103
fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b=%f op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b, op_type);

tests/test_convolution_3.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int
179179
}
180180

181181
int flag = TEST_LAYER_DISABLE_GPU_TESTING;
182-
int ret = test_layer("Convolution", pd, weights, a, requant ? 1.0f : 0.001f, 0, flag);
182+
int ret = test_layer("Convolution", pd, weights, a, requant ? 1.0f : 0.001f, flag);
183183
if (ret != 0)
184184
{
185185
fprintf(stderr, "test_convolution_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, requant, activation_type, activation_params[0], activation_params[1]);
@@ -200,7 +200,7 @@ static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int
200200
opt.use_winograd23_convolution = true;
201201
opt.use_winograd43_convolution = false;
202202

203-
ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, 0, flag);
203+
ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, flag);
204204
if (ret != 0)
205205
{
206206
fprintf(stderr, "test_convolution_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, requant, activation_type, activation_params[0], activation_params[1]);
@@ -219,7 +219,7 @@ static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int
219219
opt.use_sgemm_convolution = false;
220220
opt.use_winograd_convolution = false;
221221

222-
ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, 0, flag);
222+
ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, flag);
223223
if (ret != 0)
224224
{
225225
fprintf(stderr, "test_convolution_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, requant, activation_type, activation_params[0], activation_params[1]);
@@ -238,7 +238,7 @@ static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int
238238
opt.use_sgemm_convolution = false;
239239
opt.use_winograd_convolution = false;
240240

241-
ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, 0, flag);
241+
ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, flag);
242242
if (ret != 0)
243243
{
244244
fprintf(stderr, "test_convolution_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, requant, activation_type, activation_params[0], activation_params[1]);
@@ -257,7 +257,7 @@ static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int
257257
opt.use_sgemm_convolution = false;
258258
opt.use_winograd_convolution = false;
259259

260-
ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, 0, flag);
260+
ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, flag);
261261
if (ret != 0)
262262
{
263263
fprintf(stderr, "test_convolution_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, requant, activation_type, activation_params[0], activation_params[1]);

tests/test_convolutiondepthwise_1.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ static int test_convolutiondepthwise_int8(int w, int h, int c, int outch, int ke
126126
}
127127

128128
int flag = TEST_LAYER_DISABLE_GPU_TESTING;
129-
int ret = test_layer("ConvolutionDepthWise", pd, weights, a, requant ? 1.0f : 0.001f, 0, flag);
129+
int ret = test_layer("ConvolutionDepthWise", pd, weights, a, requant ? 1.0f : 0.001f, flag);
130130
if (ret != 0)
131131
{
132132
fprintf(stderr, "test_convolutiondepthwise_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d group=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, group, requant, activation_type, activation_params[0], activation_params[1]);

tests/test_dequantize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ static int test_dequantize(const ncnn::Mat& a, int scale_data_size, int bias_dat
1515
weights[1] = RandomMat(bias_data_size);
1616

1717
int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING;
18-
int ret = test_layer("Dequantize", pd, weights, a, 0.001, 0, flag);
18+
int ret = test_layer("Dequantize", pd, weights, a, 0.001, flag);
1919
if (ret != 0)
2020
{
2121
fprintf(stderr, "test_dequantize failed a.dims=%d a=(%d %d %d) scale_data_size=%d bias_data_size=%d\n", a.dims, a.w, a.h, a.c, scale_data_size, bias_data_size);
@@ -36,7 +36,7 @@ static int test_dequantize_pack8(const ncnn::Mat& a, int scale_data_size, int bi
3636
weights[1] = RandomMat(bias_data_size);
3737

3838
int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_ENABLE_FORCE_INPUT_PACK8;
39-
int ret = test_layer("Dequantize", pd, weights, a, 0.001, 0, flag);
39+
int ret = test_layer("Dequantize", pd, weights, a, 0.001, flag);
4040
if (ret != 0)
4141
{
4242
fprintf(stderr, "test_dequantize_pack8 failed a.dims=%d a=(%d %d %d) scale_data_size=%d bias_data_size=%d\n", a.dims, a.w, a.h, a.c, scale_data_size, bias_data_size);

tests/test_flatten.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ static int test_flatten_int8(const ncnn::Mat& a)
4848
std::vector<ncnn::Mat> weights(0);
4949

5050
int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_DISABLE_GPU_TESTING;
51-
int ret = test_layer("Flatten", pd, weights, a, 0.001, 0, flag);
51+
int ret = test_layer("Flatten", pd, weights, a, 0.001, flag);
5252
if (ret != 0)
5353
{
5454
fprintf(stderr, "test_flatten_int8 failed a.dims=%d a=(%d %d %d %d)\n", a.dims, a.w, a.h, a.d, a.c);

tests/test_gemm_nt.cpp

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// Copyright 2025 Tencent
2+
// SPDX-License-Identifier: BSD-3-Clause
3+
4+
#include "testutil.h"
5+
6+
static int test_gemm_nt(int M, int N, int K, int transA, int transB, int output_transpose, int constantA, int constantB)
7+
{
8+
ncnn::ParamDict pd;
9+
pd.set(2, transA);
10+
pd.set(3, transB);
11+
pd.set(4, constantA);
12+
pd.set(5, constantB);
13+
pd.set(6, 1);
14+
pd.set(7, M);
15+
pd.set(8, N);
16+
pd.set(9, K);
17+
pd.set(10, -1);
18+
pd.set(14, output_transpose);
19+
20+
std::vector<ncnn::Mat> weights;
21+
if (constantA) weights.push_back(transA ? ncnn::Mat(M, K) : ncnn::Mat(K, M));
22+
if (constantB) weights.push_back(transB ? ncnn::Mat(K, N) : ncnn::Mat(N, K));
23+
24+
std::vector<ncnn::Mat> a;
25+
if (!constantA) a.push_back(transA ? ncnn::Mat(M, K) : ncnn::Mat(K, M));
26+
if (!constantB) a.push_back(transB ? ncnn::Mat(K, N) : ncnn::Mat(N, K));
27+
28+
for (size_t i = 0; i < weights.size(); i++)
29+
{
30+
Randomize(weights[i]);
31+
}
32+
33+
for (size_t i = 0; i < a.size(); i++)
34+
{
35+
Randomize(a[i]);
36+
}
37+
38+
float epsilon = 0.001;
39+
40+
int ret = test_layer("Gemm", pd, weights, a, 1, epsilon, TEST_LAYER_ENABLE_THREADING);
41+
if (ret != 0)
42+
{
43+
fprintf(stderr, "test_gemm_nt failed M=%d N=%d K=%d transA=%d transB=%d output_transpose=%d constantA=%d constantB=%d\n", M, N, K, transA, transB, output_transpose, constantA, constantB);
44+
}
45+
46+
return ret;
47+
}
48+
49+
static int test_gemm_0(int M, int N, int K)
50+
{
51+
return 0
52+
|| test_gemm_nt(M, N, K, 0, 0, 0, 0, 0)
53+
|| test_gemm_nt(M, N, K, 0, 1, 0, 0, 0)
54+
|| test_gemm_nt(M, N, K, 1, 0, 1, 0, 0)
55+
|| test_gemm_nt(M, N, K, 1, 1, 1, 0, 0)
56+
57+
|| test_gemm_nt(M, N, K, 0, 0, 1, 1, 0)
58+
|| test_gemm_nt(M, N, K, 0, 1, 1, 1, 0)
59+
|| test_gemm_nt(M, N, K, 1, 0, 0, 1, 0)
60+
|| test_gemm_nt(M, N, K, 1, 1, 0, 1, 0)
61+
62+
|| test_gemm_nt(M, N, K, 0, 0, 0, 0, 1)
63+
|| test_gemm_nt(M, N, K, 0, 1, 1, 0, 1)
64+
|| test_gemm_nt(M, N, K, 1, 0, 0, 0, 1)
65+
|| test_gemm_nt(M, N, K, 1, 1, 1, 0, 1)
66+
67+
|| test_gemm_nt(M, N, K, 0, 0, 1, 1, 1)
68+
|| test_gemm_nt(M, N, K, 0, 1, 0, 1, 1)
69+
|| test_gemm_nt(M, N, K, 1, 0, 1, 1, 1)
70+
|| test_gemm_nt(M, N, K, 1, 1, 0, 1, 1);
71+
}
72+
73+
int main()
74+
{
75+
SRAND(7767517);
76+
77+
int mnk[][3] = {
78+
{1, 20, 40},
79+
{20, 2, 39},
80+
{3, 30, 13},
81+
{33, 1, 19}
82+
};
83+
84+
int mnk_count = sizeof(mnk) / sizeof(int) / 3;
85+
86+
for (int i = 0; i < mnk_count; i++)
87+
{
88+
int M = mnk[i][0];
89+
int N = mnk[i][1];
90+
int K = mnk[i][2];
91+
92+
int ret = test_gemm_0(M, N, K);
93+
94+
if (ret != 0)
95+
return ret;
96+
}
97+
98+
return 0;
99+
}

0 commit comments

Comments
 (0)