gemm x86 tile-N threading, drop testutil layer hook function (#6428)

nihui · web-flow · commit 544230f71d53 · 2025-11-28T15:48:48.000+08:00
* test threading
* drop testutil layer hook function
diff --git a/src/layer/x86/gemm_x86.cpp b/src/layer/x86/gemm_x86.cpp
diff --git a/tests/test_binaryop.cpp b/tests/test_binaryop.cpp
@@ -55,7 +55,7 @@ static int test_binaryop(const ncnn::Mat& _a, const ncnn::Mat& _b, int flag)
     ab[0] = a;
     ab[1] = b;
 
-    int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, 0, flag);
+    int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b.dims=%d b=(%d %d %d %d) op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b.dims, b.w, b.h, b.d, b.c, op_type);
@@ -97,7 +97,7 @@ static int test_binaryop(const ncnn::Mat& _a, float b, int flag)
 
     std::vector<ncnn::Mat> weights(0);
 
-    int ret = test_layer("BinaryOp", pd, weights, a, 0.001, 0, flag);
+    int ret = test_layer("BinaryOp", pd, weights, a, 0.001, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b=%f op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b, op_type);
diff --git a/tests/test_binaryop_1.cpp b/tests/test_binaryop_1.cpp
@@ -55,7 +55,7 @@ static int test_binaryop(const ncnn::Mat& _a, const ncnn::Mat& _b, int flag)
     ab[0] = a;
     ab[1] = b;
 
-    int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, 0, flag);
+    int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b.dims=%d b=(%d %d %d %d) op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b.dims, b.w, b.h, b.d, b.c, op_type);
@@ -97,7 +97,7 @@ static int test_binaryop(const ncnn::Mat& _a, float b, int flag)
 
     std::vector<ncnn::Mat> weights(0);
 
-    int ret = test_layer("BinaryOp", pd, weights, a, 0.001, 0, flag);
+    int ret = test_layer("BinaryOp", pd, weights, a, 0.001, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b=%f op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b, op_type);
diff --git a/tests/test_binaryop_2.cpp b/tests/test_binaryop_2.cpp
@@ -55,7 +55,7 @@ static int test_binaryop(const ncnn::Mat& _a, const ncnn::Mat& _b, int flag)
     ab[0] = a;
     ab[1] = b;
 
-    int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, 0, flag);
+    int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b.dims=%d b=(%d %d %d %d) op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b.dims, b.w, b.h, b.d, b.c, op_type);
@@ -97,7 +97,7 @@ static int test_binaryop(const ncnn::Mat& _a, float b, int flag)
 
     std::vector<ncnn::Mat> weights(0);
 
-    int ret = test_layer("BinaryOp", pd, weights, a, 0.001, 0, flag);
+    int ret = test_layer("BinaryOp", pd, weights, a, 0.001, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b=%f op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b, op_type);
diff --git a/tests/test_binaryop_3.cpp b/tests/test_binaryop_3.cpp
@@ -55,7 +55,7 @@ static int test_binaryop(const ncnn::Mat& _a, const ncnn::Mat& _b, int flag)
     ab[0] = a;
     ab[1] = b;
 
-    int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, 0, flag);
+    int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b.dims=%d b=(%d %d %d %d) op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b.dims, b.w, b.h, b.d, b.c, op_type);
@@ -97,7 +97,7 @@ static int test_binaryop(const ncnn::Mat& _a, float b, int flag)
 
     std::vector<ncnn::Mat> weights(0);
 
-    int ret = test_layer("BinaryOp", pd, weights, a, 0.001, 0, flag);
+    int ret = test_layer("BinaryOp", pd, weights, a, 0.001, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b=%f op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b, op_type);
diff --git a/tests/test_convolution_3.cpp b/tests/test_convolution_3.cpp
@@ -179,7 +179,7 @@ static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int
     }
 
     int flag = TEST_LAYER_DISABLE_GPU_TESTING;
-    int ret = test_layer("Convolution", pd, weights, a, requant ? 1.0f : 0.001f, 0, flag);
+    int ret = test_layer("Convolution", pd, weights, a, requant ? 1.0f : 0.001f, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_convolution_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, requant, activation_type, activation_params[0], activation_params[1]);
@@ -200,7 +200,7 @@ static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int
         opt.use_winograd23_convolution = true;
         opt.use_winograd43_convolution = false;
 
-        ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, 0, flag);
+        ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, flag);
         if (ret != 0)
         {
             fprintf(stderr, "test_convolution_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, requant, activation_type, activation_params[0], activation_params[1]);
@@ -219,7 +219,7 @@ static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int
         opt.use_sgemm_convolution = false;
         opt.use_winograd_convolution = false;
 
-        ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, 0, flag);
+        ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, flag);
         if (ret != 0)
         {
             fprintf(stderr, "test_convolution_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, requant, activation_type, activation_params[0], activation_params[1]);
@@ -238,7 +238,7 @@ static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int
         opt.use_sgemm_convolution = false;
         opt.use_winograd_convolution = false;
 
-        ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, 0, flag);
+        ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, flag);
         if (ret != 0)
         {
             fprintf(stderr, "test_convolution_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, requant, activation_type, activation_params[0], activation_params[1]);
@@ -257,7 +257,7 @@ static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int
         opt.use_sgemm_convolution = false;
         opt.use_winograd_convolution = false;
 
-        ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, 0, flag);
+        ret = test_layer_opt("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, flag);
         if (ret != 0)
         {
             fprintf(stderr, "test_convolution_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, requant, activation_type, activation_params[0], activation_params[1]);
diff --git a/tests/test_convolutiondepthwise_1.cpp b/tests/test_convolutiondepthwise_1.cpp
@@ -126,7 +126,7 @@ static int test_convolutiondepthwise_int8(int w, int h, int c, int outch, int ke
     }
 
     int flag = TEST_LAYER_DISABLE_GPU_TESTING;
-    int ret = test_layer("ConvolutionDepthWise", pd, weights, a, requant ? 1.0f : 0.001f, 0, flag);
+    int ret = test_layer("ConvolutionDepthWise", pd, weights, a, requant ? 1.0f : 0.001f, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_convolutiondepthwise_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d group=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, group, requant, activation_type, activation_params[0], activation_params[1]);
diff --git a/tests/test_dequantize.cpp b/tests/test_dequantize.cpp
@@ -15,7 +15,7 @@ static int test_dequantize(const ncnn::Mat& a, int scale_data_size, int bias_dat
         weights[1] = RandomMat(bias_data_size);
 
     int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING;
-    int ret = test_layer("Dequantize", pd, weights, a, 0.001, 0, flag);
+    int ret = test_layer("Dequantize", pd, weights, a, 0.001, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_dequantize failed a.dims=%d a=(%d %d %d) scale_data_size=%d bias_data_size=%d\n", a.dims, a.w, a.h, a.c, scale_data_size, bias_data_size);
@@ -36,7 +36,7 @@ static int test_dequantize_pack8(const ncnn::Mat& a, int scale_data_size, int bi
         weights[1] = RandomMat(bias_data_size);
 
     int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_ENABLE_FORCE_INPUT_PACK8;
-    int ret = test_layer("Dequantize", pd, weights, a, 0.001, 0, flag);
+    int ret = test_layer("Dequantize", pd, weights, a, 0.001, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_dequantize_pack8 failed a.dims=%d a=(%d %d %d) scale_data_size=%d bias_data_size=%d\n", a.dims, a.w, a.h, a.c, scale_data_size, bias_data_size);
diff --git a/tests/test_flatten.cpp b/tests/test_flatten.cpp
@@ -48,7 +48,7 @@ static int test_flatten_int8(const ncnn::Mat& a)
     std::vector<ncnn::Mat> weights(0);
 
     int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_DISABLE_GPU_TESTING;
-    int ret = test_layer("Flatten", pd, weights, a, 0.001, 0, flag);
+    int ret = test_layer("Flatten", pd, weights, a, 0.001, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_flatten_int8 failed a.dims=%d a=(%d %d %d %d)\n", a.dims, a.w, a.h, a.d, a.c);
diff --git a/tests/test_gemm_nt.cpp b/tests/test_gemm_nt.cpp
@@ -0,0 +1,99 @@
+// Copyright 2025 Tencent
+// SPDX-License-Identifier: BSD-3-Clause
+
+#include "testutil.h"
+
+static int test_gemm_nt(int M, int N, int K, int transA, int transB, int output_transpose, int constantA, int constantB)
+{
+    ncnn::ParamDict pd;
+    pd.set(2, transA);
+    pd.set(3, transB);
+    pd.set(4, constantA);
+    pd.set(5, constantB);
+    pd.set(6, 1);
+    pd.set(7, M);
+    pd.set(8, N);
+    pd.set(9, K);
+    pd.set(10, -1);
+    pd.set(14, output_transpose);
+
+    std::vector<ncnn::Mat> weights;
+    if (constantA) weights.push_back(transA ? ncnn::Mat(M, K) : ncnn::Mat(K, M));
+    if (constantB) weights.push_back(transB ? ncnn::Mat(K, N) : ncnn::Mat(N, K));
+
+    std::vector<ncnn::Mat> a;
+    if (!constantA) a.push_back(transA ? ncnn::Mat(M, K) : ncnn::Mat(K, M));
+    if (!constantB) a.push_back(transB ? ncnn::Mat(K, N) : ncnn::Mat(N, K));
+
+    for (size_t i = 0; i < weights.size(); i++)
+    {
+        Randomize(weights[i]);
+    }
+
+    for (size_t i = 0; i < a.size(); i++)
+    {
+        Randomize(a[i]);
+    }
+
+    float epsilon = 0.001;
+
+    int ret = test_layer("Gemm", pd, weights, a, 1, epsilon, TEST_LAYER_ENABLE_THREADING);
+    if (ret != 0)
+    {
+        fprintf(stderr, "test_gemm_nt failed M=%d N=%d K=%d transA=%d transB=%d output_transpose=%d constantA=%d constantB=%d\n", M, N, K, transA, transB, output_transpose, constantA, constantB);
+    }
+
+    return ret;
+}
+
+static int test_gemm_0(int M, int N, int K)
+{
+    return 0
+           || test_gemm_nt(M, N, K, 0, 0, 0, 0, 0)
+           || test_gemm_nt(M, N, K, 0, 1, 0, 0, 0)
+           || test_gemm_nt(M, N, K, 1, 0, 1, 0, 0)
+           || test_gemm_nt(M, N, K, 1, 1, 1, 0, 0)
+
+           || test_gemm_nt(M, N, K, 0, 0, 1, 1, 0)
+           || test_gemm_nt(M, N, K, 0, 1, 1, 1, 0)
+           || test_gemm_nt(M, N, K, 1, 0, 0, 1, 0)
+           || test_gemm_nt(M, N, K, 1, 1, 0, 1, 0)
+
+           || test_gemm_nt(M, N, K, 0, 0, 0, 0, 1)
+           || test_gemm_nt(M, N, K, 0, 1, 1, 0, 1)
+           || test_gemm_nt(M, N, K, 1, 0, 0, 0, 1)
+           || test_gemm_nt(M, N, K, 1, 1, 1, 0, 1)
+
+           || test_gemm_nt(M, N, K, 0, 0, 1, 1, 1)
+           || test_gemm_nt(M, N, K, 0, 1, 0, 1, 1)
+           || test_gemm_nt(M, N, K, 1, 0, 1, 1, 1)
+           || test_gemm_nt(M, N, K, 1, 1, 0, 1, 1);
+}
+
+int main()
+{
+    SRAND(7767517);
+
+    int mnk[][3] = {
+        {1, 20, 40},
+        {20, 2, 39},
+        {3, 30, 13},
+        {33, 1, 19}
+    };
+
+    int mnk_count = sizeof(mnk) / sizeof(int) / 3;
+
+    for (int i = 0; i < mnk_count; i++)
+    {
+        int M = mnk[i][0];
+        int N = mnk[i][1];
+        int K = mnk[i][2];
+
+        int ret = test_gemm_0(M, N, K);
+
+        if (ret != 0)
+            return ret;
+    }
+
+    return 0;
+}
diff --git a/tests/test_innerproduct.cpp b/tests/test_innerproduct.cpp
@@ -110,7 +110,7 @@ static int test_innerproduct_int8(const ncnn::Mat& a, int outch, int bias)
     }
 
     int flag = TEST_LAYER_DISABLE_GPU_TESTING;
-    int ret = test_layer("InnerProduct", pd, weights, a, 0.001f, 0, flag);
+    int ret = test_layer("InnerProduct", pd, weights, a, 0.001f, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_innerproduct_int8 failed a.dims=%d a=(%d %d %d) outch=%d bias=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, outch, bias, activation_type, activation_params[0], activation_params[1]);
@@ -222,7 +222,7 @@ static int test_innerproduct_gemm_int8(const ncnn::Mat& a, int outch, int bias)
     }
 
     int flag = TEST_LAYER_DISABLE_GPU_TESTING;
-    int ret = test_layer("InnerProduct", pd, weights, a, 0.001f, 0, flag);
+    int ret = test_layer("InnerProduct", pd, weights, a, 0.001f, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_innerproduct_gemm_int8 failed a.dims=%d a=(%d %d %d) outch=%d bias=%d\n", a.dims, a.w, a.h, a.c, outch, bias);
diff --git a/tests/test_padding.cpp b/tests/test_padding.cpp
@@ -230,7 +230,7 @@ static int test_padding_int8(const ncnn::Mat& a, int top, int bottom, int left,
         weights[0] = RandomMat(per_channel_pad_data_size);
 
     int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_DISABLE_GPU_TESTING;
-    int ret = test_layer("Padding", pd, weights, a, 0.001, 0, flag);
+    int ret = test_layer("Padding", pd, weights, a, 0.001, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_padding_int8 failed a.dims=%d a=(%d %d %d %d) top=%d bottom=%d left=%d right=%d front=%d behind=%d type=%d value=%f per_channel_pad_data_size=%d\n", a.dims, a.w, a.h, a.d, a.c, top, bottom, left, right, front, behind, type, value, per_channel_pad_data_size);
diff --git a/tests/test_requantize.cpp b/tests/test_requantize.cpp
@@ -26,7 +26,7 @@ static int test_requantize_pack1(const ncnn::Mat& a, int scale_in_data_size, int
     Randomize(weights[1], 10, 100);
 
     int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_DISABLE_AUTO_INPUT_PACKING;
-    int ret = test_layer("Requantize", pd, weights, a, 1, 0, flag);
+    int ret = test_layer("Requantize", pd, weights, a, 1, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_requantize_pack1 failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]);
@@ -69,7 +69,7 @@ static int test_requantize_pack8(const ncnn::Mat& a, int scale_in_data_size, int
     Randomize(weights[1], 10, 100);
 
     int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_ENABLE_FORCE_INPUT_PACK8;
-    int ret = test_layer("Requantize", pd, weights, a, 1, 0, flag);
+    int ret = test_layer("Requantize", pd, weights, a, 1, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_requantize_pack8 failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]);
diff --git a/tests/testutil.cpp b/tests/testutil.cpp
diff --git a/tests/testutil.h b/tests/testutil.h

Original file line number	Diff line number	Diff line change
`@@ -55,7 +55,7 @@ static int test_binaryop(const ncnn::Mat& _a, const ncnn::Mat& _b, int flag)`
`55`	`55`	`ab[0] = a;`
`56`	`56`	`ab[1] = b;`
`57`	`57`
`58`		`- int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, 0, flag);`
	`58`	`+ int ret = test_layer("BinaryOp", pd, weights, ab, 1, 0.001, flag);`
`59`	`59`	`if (ret != 0)`
`60`	`60`	`{`
`61`	`61`	`fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b.dims=%d b=(%d %d %d %d) op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b.dims, b.w, b.h, b.d, b.c, op_type);`
`@@ -97,7 +97,7 @@ static int test_binaryop(const ncnn::Mat& _a, float b, int flag)`
`97`	`97`
`98`	`98`	`std::vector<ncnn::Mat> weights(0);`
`99`	`99`
`100`		`- int ret = test_layer("BinaryOp", pd, weights, a, 0.001, 0, flag);`
	`100`	`+ int ret = test_layer("BinaryOp", pd, weights, a, 0.001, flag);`
`101`	`101`	`if (ret != 0)`
`102`	`102`	`{`
`103`	`103`	`fprintf(stderr, "test_binaryop failed a.dims=%d a=(%d %d %d %d) b=%f op_type=%d\n", a.dims, a.w, a.h, a.d, a.c, b, op_type);`
Original file line number	Diff line number	Diff line change
`@@ -126,7 +126,7 @@ static int test_convolutiondepthwise_int8(int w, int h, int c, int outch, int ke`
`126`	`126`	`}`
`127`	`127`
`128`	`128`	`int flag = TEST_LAYER_DISABLE_GPU_TESTING;`
`129`		`- int ret = test_layer("ConvolutionDepthWise", pd, weights, a, requant ? 1.0f : 0.001f, 0, flag);`
	`129`	`+ int ret = test_layer("ConvolutionDepthWise", pd, weights, a, requant ? 1.0f : 0.001f, flag);`
`130`	`130`	`if (ret != 0)`
`131`	`131`	`{`
`132`	`132`	`fprintf(stderr, "test_convolutiondepthwise_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d group=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, group, requant, activation_type, activation_params[0], activation_params[1]);`
Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@ static int test_flatten_int8(const ncnn::Mat& a)`
`48`	`48`	`std::vector<ncnn::Mat> weights(0);`
`49`	`49`
`50`	`50`	`int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING \| TEST_LAYER_DISABLE_GPU_TESTING;`
`51`		`- int ret = test_layer("Flatten", pd, weights, a, 0.001, 0, flag);`
	`51`	`+ int ret = test_layer("Flatten", pd, weights, a, 0.001, flag);`
`52`	`52`	`if (ret != 0)`
`53`	`53`	`{`
`54`	`54`	`fprintf(stderr, "test_flatten_int8 failed a.dims=%d a=(%d %d %d %d)\n", a.dims, a.w, a.h, a.d, a.c);`