test threading

nihui · nihui · commit 6b634caa4fc8 · 2025-11-28T14:25:32.000+08:00
diff --git a/tests/test_gemm_nt.cpp b/tests/test_gemm_nt.cpp
@@ -0,0 +1,99 @@
+// Copyright 2025 Tencent
+// SPDX-License-Identifier: BSD-3-Clause
+
+#include "testutil.h"
+
+static int test_gemm_nt(int M, int N, int K, int transA, int transB, int output_transpose, int constantA, int constantB)
+{
+    ncnn::ParamDict pd;
+    pd.set(2, transA);
+    pd.set(3, transB);
+    pd.set(4, constantA);
+    pd.set(5, constantB);
+    pd.set(6, 1);
+    pd.set(7, M);
+    pd.set(8, N);
+    pd.set(9, K);
+    pd.set(10, -1);
+    pd.set(14, output_transpose);
+
+    std::vector<ncnn::Mat> weights;
+    if (constantA) weights.push_back(transA ? ncnn::Mat(M, K) : ncnn::Mat(K, M));
+    if (constantB) weights.push_back(transB ? ncnn::Mat(K, N) : ncnn::Mat(N, K));
+
+    std::vector<ncnn::Mat> a;
+    if (!constantA) a.push_back(transA ? ncnn::Mat(M, K) : ncnn::Mat(K, M));
+    if (!constantB) a.push_back(transB ? ncnn::Mat(K, N) : ncnn::Mat(N, K));
+
+    for (size_t i = 0; i < weights.size(); i++)
+    {
+        Randomize(weights[i]);
+    }
+
+    for (size_t i = 0; i < a.size(); i++)
+    {
+        Randomize(a[i]);
+    }
+
+    float epsilon = 0.001;
+
+    int ret = test_layer("Gemm", pd, weights, a, 1, epsilon, 0, TEST_LAYER_ENABLE_THREADING);
+    if (ret != 0)
+    {
+        fprintf(stderr, "test_gemm_nt failed M=%d N=%d K=%d transA=%d transB=%d output_transpose=%d constantA=%d constantB=%d\n", M, N, K, transA, transB, output_transpose, constantA, constantB);
+    }
+
+    return ret;
+}
+
+static int test_gemm_0(int M, int N, int K)
+{
+    return 0
+           || test_gemm_nt(M, N, K, 0, 0, 0, 0, 0)
+           || test_gemm_nt(M, N, K, 0, 1, 0, 0, 0)
+           || test_gemm_nt(M, N, K, 1, 0, 1, 0, 0)
+           || test_gemm_nt(M, N, K, 1, 1, 1, 0, 0)
+
+           || test_gemm_nt(M, N, K, 0, 0, 1, 1, 0)
+           || test_gemm_nt(M, N, K, 0, 1, 1, 1, 0)
+           || test_gemm_nt(M, N, K, 1, 0, 0, 1, 0)
+           || test_gemm_nt(M, N, K, 1, 1, 0, 1, 0)
+
+           || test_gemm_nt(M, N, K, 0, 0, 0, 0, 1)
+           || test_gemm_nt(M, N, K, 0, 1, 1, 0, 1)
+           || test_gemm_nt(M, N, K, 1, 0, 0, 0, 1)
+           || test_gemm_nt(M, N, K, 1, 1, 1, 0, 1)
+
+           || test_gemm_nt(M, N, K, 0, 0, 1, 1, 1)
+           || test_gemm_nt(M, N, K, 0, 1, 0, 1, 1)
+           || test_gemm_nt(M, N, K, 1, 0, 1, 1, 1)
+           || test_gemm_nt(M, N, K, 1, 1, 0, 1, 1);
+}
+
+int main()
+{
+    SRAND(7767517);
+
+    int mnk[][3] = {
+        {1, 20, 40},
+        {20, 2, 39},
+        {3, 30, 13},
+        {33, 1, 19}
+    };
+
+    int mnk_count = sizeof(mnk) / sizeof(int) / 3;
+
+    for (int i = 0; i < mnk_count; i++)
+    {
+        int M = mnk[i][0];
+        int N = mnk[i][1];
+        int K = mnk[i][2];
+
+        int ret = test_gemm_0(M, N, K);
+
+        if (ret != 0)
+            return ret;
+    }
+
+    return 0;
+}
diff --git a/tests/testutil.cpp b/tests/testutil.cpp
@@ -656,9 +656,13 @@ int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<n
     op->load_model(mb);
 
     ncnn::Option opt = _opt;
-    opt.num_threads = 1;
     opt.use_vulkan_compute = false;
 
+    if (flag & TEST_LAYER_ENABLE_THREADING)
+        opt.num_threads = ncnn::get_physical_big_cpu_count();
+    else
+        opt.num_threads = 1;
+
     op->create_pipeline(opt);
 
     if (!op->support_packing && _opt.use_packing_layout)
@@ -814,9 +818,13 @@ int test_layer_gpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<n
     ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
 
     ncnn::Option opt = _opt;
-    opt.num_threads = 1;
     opt.use_vulkan_compute = true;
 
+    if (flag & TEST_LAYER_ENABLE_THREADING)
+        opt.num_threads = ncnn::get_physical_big_cpu_count();
+    else
+        opt.num_threads = 1;
+
     opt.blob_vkallocator = blob_vkallocator;
     opt.workspace_vkallocator = blob_vkallocator;
     opt.staging_vkallocator = staging_vkallocator;
@@ -1149,9 +1157,13 @@ int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<n
     op->load_model(mb);
 
     ncnn::Option opt = _opt;
-    opt.num_threads = 1;
     opt.use_vulkan_compute = false;
 
+    if (flag & TEST_LAYER_ENABLE_THREADING)
+        opt.num_threads = ncnn::get_physical_big_cpu_count();
+    else
+        opt.num_threads = 1;
+
     op->create_pipeline(opt);
 
     if (!op->support_packing && _opt.use_packing_layout)
@@ -1277,9 +1289,13 @@ int test_layer_gpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<n
     ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
 
     ncnn::Option opt = _opt;
-    opt.num_threads = 1;
     opt.use_vulkan_compute = true;
 
+    if (flag & TEST_LAYER_ENABLE_THREADING)
+        opt.num_threads = ncnn::get_physical_big_cpu_count();
+    else
+        opt.num_threads = 1;
+
     opt.blob_vkallocator = blob_vkallocator;
     opt.workspace_vkallocator = blob_vkallocator;
     opt.staging_vkallocator = staging_vkallocator;
diff --git a/tests/testutil.h b/tests/testutil.h
@@ -16,6 +16,7 @@
 #define TEST_LAYER_DISABLE_AUTO_INPUT_CASTING (1 << 1)
 #define TEST_LAYER_DISABLE_GPU_TESTING        (1 << 2)
 #define TEST_LAYER_ENABLE_FORCE_INPUT_PACK8   (1 << 3)
+#define TEST_LAYER_ENABLE_THREADING           (1 << 4)
 
 void SRAND(int seed);