Skip to content

Commit 6b634ca

Browse files
committed
test threading
1 parent 4516eb2 commit 6b634ca

File tree

3 files changed

+120
-4
lines changed

3 files changed

+120
-4
lines changed

tests/test_gemm_nt.cpp

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// Copyright 2025 Tencent
2+
// SPDX-License-Identifier: BSD-3-Clause
3+
4+
#include "testutil.h"
5+
6+
static int test_gemm_nt(int M, int N, int K, int transA, int transB, int output_transpose, int constantA, int constantB)
7+
{
8+
ncnn::ParamDict pd;
9+
pd.set(2, transA);
10+
pd.set(3, transB);
11+
pd.set(4, constantA);
12+
pd.set(5, constantB);
13+
pd.set(6, 1);
14+
pd.set(7, M);
15+
pd.set(8, N);
16+
pd.set(9, K);
17+
pd.set(10, -1);
18+
pd.set(14, output_transpose);
19+
20+
std::vector<ncnn::Mat> weights;
21+
if (constantA) weights.push_back(transA ? ncnn::Mat(M, K) : ncnn::Mat(K, M));
22+
if (constantB) weights.push_back(transB ? ncnn::Mat(K, N) : ncnn::Mat(N, K));
23+
24+
std::vector<ncnn::Mat> a;
25+
if (!constantA) a.push_back(transA ? ncnn::Mat(M, K) : ncnn::Mat(K, M));
26+
if (!constantB) a.push_back(transB ? ncnn::Mat(K, N) : ncnn::Mat(N, K));
27+
28+
for (size_t i = 0; i < weights.size(); i++)
29+
{
30+
Randomize(weights[i]);
31+
}
32+
33+
for (size_t i = 0; i < a.size(); i++)
34+
{
35+
Randomize(a[i]);
36+
}
37+
38+
float epsilon = 0.001;
39+
40+
int ret = test_layer("Gemm", pd, weights, a, 1, epsilon, 0, TEST_LAYER_ENABLE_THREADING);
41+
if (ret != 0)
42+
{
43+
fprintf(stderr, "test_gemm_nt failed M=%d N=%d K=%d transA=%d transB=%d output_transpose=%d constantA=%d constantB=%d\n", M, N, K, transA, transB, output_transpose, constantA, constantB);
44+
}
45+
46+
return ret;
47+
}
48+
49+
static int test_gemm_0(int M, int N, int K)
50+
{
51+
return 0
52+
|| test_gemm_nt(M, N, K, 0, 0, 0, 0, 0)
53+
|| test_gemm_nt(M, N, K, 0, 1, 0, 0, 0)
54+
|| test_gemm_nt(M, N, K, 1, 0, 1, 0, 0)
55+
|| test_gemm_nt(M, N, K, 1, 1, 1, 0, 0)
56+
57+
|| test_gemm_nt(M, N, K, 0, 0, 1, 1, 0)
58+
|| test_gemm_nt(M, N, K, 0, 1, 1, 1, 0)
59+
|| test_gemm_nt(M, N, K, 1, 0, 0, 1, 0)
60+
|| test_gemm_nt(M, N, K, 1, 1, 0, 1, 0)
61+
62+
|| test_gemm_nt(M, N, K, 0, 0, 0, 0, 1)
63+
|| test_gemm_nt(M, N, K, 0, 1, 1, 0, 1)
64+
|| test_gemm_nt(M, N, K, 1, 0, 0, 0, 1)
65+
|| test_gemm_nt(M, N, K, 1, 1, 1, 0, 1)
66+
67+
|| test_gemm_nt(M, N, K, 0, 0, 1, 1, 1)
68+
|| test_gemm_nt(M, N, K, 0, 1, 0, 1, 1)
69+
|| test_gemm_nt(M, N, K, 1, 0, 1, 1, 1)
70+
|| test_gemm_nt(M, N, K, 1, 1, 0, 1, 1);
71+
}
72+
73+
int main()
74+
{
75+
SRAND(7767517);
76+
77+
int mnk[][3] = {
78+
{1, 20, 40},
79+
{20, 2, 39},
80+
{3, 30, 13},
81+
{33, 1, 19}
82+
};
83+
84+
int mnk_count = sizeof(mnk) / sizeof(int) / 3;
85+
86+
for (int i = 0; i < mnk_count; i++)
87+
{
88+
int M = mnk[i][0];
89+
int N = mnk[i][1];
90+
int K = mnk[i][2];
91+
92+
int ret = test_gemm_0(M, N, K);
93+
94+
if (ret != 0)
95+
return ret;
96+
}
97+
98+
return 0;
99+
}

tests/testutil.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -656,9 +656,13 @@ int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<n
656656
op->load_model(mb);
657657

658658
ncnn::Option opt = _opt;
659-
opt.num_threads = 1;
660659
opt.use_vulkan_compute = false;
661660

661+
if (flag & TEST_LAYER_ENABLE_THREADING)
662+
opt.num_threads = ncnn::get_physical_big_cpu_count();
663+
else
664+
opt.num_threads = 1;
665+
662666
op->create_pipeline(opt);
663667

664668
if (!op->support_packing && _opt.use_packing_layout)
@@ -814,9 +818,13 @@ int test_layer_gpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<n
814818
ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
815819

816820
ncnn::Option opt = _opt;
817-
opt.num_threads = 1;
818821
opt.use_vulkan_compute = true;
819822

823+
if (flag & TEST_LAYER_ENABLE_THREADING)
824+
opt.num_threads = ncnn::get_physical_big_cpu_count();
825+
else
826+
opt.num_threads = 1;
827+
820828
opt.blob_vkallocator = blob_vkallocator;
821829
opt.workspace_vkallocator = blob_vkallocator;
822830
opt.staging_vkallocator = staging_vkallocator;
@@ -1149,9 +1157,13 @@ int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<n
11491157
op->load_model(mb);
11501158

11511159
ncnn::Option opt = _opt;
1152-
opt.num_threads = 1;
11531160
opt.use_vulkan_compute = false;
11541161

1162+
if (flag & TEST_LAYER_ENABLE_THREADING)
1163+
opt.num_threads = ncnn::get_physical_big_cpu_count();
1164+
else
1165+
opt.num_threads = 1;
1166+
11551167
op->create_pipeline(opt);
11561168

11571169
if (!op->support_packing && _opt.use_packing_layout)
@@ -1277,9 +1289,13 @@ int test_layer_gpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<n
12771289
ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
12781290

12791291
ncnn::Option opt = _opt;
1280-
opt.num_threads = 1;
12811292
opt.use_vulkan_compute = true;
12821293

1294+
if (flag & TEST_LAYER_ENABLE_THREADING)
1295+
opt.num_threads = ncnn::get_physical_big_cpu_count();
1296+
else
1297+
opt.num_threads = 1;
1298+
12831299
opt.blob_vkallocator = blob_vkallocator;
12841300
opt.workspace_vkallocator = blob_vkallocator;
12851301
opt.staging_vkallocator = staging_vkallocator;

tests/testutil.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define TEST_LAYER_DISABLE_AUTO_INPUT_CASTING (1 << 1)
1717
#define TEST_LAYER_DISABLE_GPU_TESTING (1 << 2)
1818
#define TEST_LAYER_ENABLE_FORCE_INPUT_PACK8 (1 << 3)
19+
#define TEST_LAYER_ENABLE_THREADING (1 << 4)
1920

2021
void SRAND(int seed);
2122

0 commit comments

Comments
 (0)