Skip to content

Commit 6b709bf

Browse files
committed
test++
1 parent 3d98511 commit 6b709bf

File tree

3 files changed

+81
-11
lines changed

3 files changed

+81
-11
lines changed

src/layer/x86/gemm_x86.cpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,17 +1421,12 @@ static void pack_B_tile(const Mat& B, Mat& BT, int j, int max_jj, int k, int max
14211421
}
14221422
for (; jj + 3 < max_jj; jj += 4)
14231423
{
1424-
#if __AVX__
1424+
#if __AVX__ && !__AVX512F__
14251425
if (elempack == 8)
14261426
{
1427-
#if __AVX512F__
1428-
// assert (j + jj) % 8 == 0
1429-
const float* p0 = (const float*)B + (j + jj) * B_hstep + k * 8;
1430-
#else
14311427
const float* p0 = (const float*)B + (j + jj) / 8 * 8 * B_hstep + k * 8;
14321428

14331429
if ((j + jj) % 8 == 0)
1434-
#endif
14351430
{
14361431
for (int kk = 0; kk < max_kk; kk++)
14371432
{
@@ -1440,7 +1435,6 @@ static void pack_B_tile(const Mat& B, Mat& BT, int j, int max_jj, int k, int max
14401435
p0 += 8;
14411436
}
14421437
}
1443-
#if !__AVX512F__
14441438
if ((j + jj) % 8 == 4)
14451439
{
14461440
for (int kk = 0; kk < max_kk; kk++)
@@ -1450,9 +1444,8 @@ static void pack_B_tile(const Mat& B, Mat& BT, int j, int max_jj, int k, int max
14501444
p0 += 8;
14511445
}
14521446
}
1453-
#endif // !__AVX512F__
14541447
}
1455-
#endif // __AVX__
1448+
#endif // __AVX__ && !__AVX512F__
14561449
if (elempack == 4)
14571450
{
14581451
const float* p0 = (const float*)B + (j + jj) * B_hstep + k * 4;

tests/test_gemm_oom.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,71 @@ static int test_gemm_4(int M, int N, int K)
363363
}
364364
#endif // NCNN_INT8
365365

366+
static int test_gemm_nt_oom(int M, int N, int K, int transA, int transB, int output_transpose, int constantA, int constantB)
367+
{
368+
ncnn::ParamDict pd;
369+
pd.set(2, transA);
370+
pd.set(3, transB);
371+
pd.set(4, constantA);
372+
pd.set(5, constantB);
373+
pd.set(6, 1);
374+
pd.set(7, M);
375+
pd.set(8, N);
376+
pd.set(9, K);
377+
pd.set(10, -1);
378+
pd.set(14, output_transpose);
379+
380+
std::vector<ncnn::Mat> weights;
381+
if (constantA) weights.push_back(transA ? ncnn::Mat(M, K) : ncnn::Mat(K, M));
382+
if (constantB) weights.push_back(transB ? ncnn::Mat(K, N) : ncnn::Mat(N, K));
383+
384+
std::vector<ncnn::Mat> a;
385+
if (!constantA) a.push_back(transA ? ncnn::Mat(M, K) : ncnn::Mat(K, M));
386+
if (!constantB) a.push_back(transB ? ncnn::Mat(K, N) : ncnn::Mat(N, K));
387+
388+
for (size_t i = 0; i < weights.size(); i++)
389+
{
390+
Randomize(weights[i]);
391+
}
392+
393+
for (size_t i = 0; i < a.size(); i++)
394+
{
395+
Randomize(a[i]);
396+
}
397+
398+
int ret = test_layer_oom("Gemm", pd, weights, a, 1, TEST_LAYER_ENABLE_THREADING);
399+
if (ret != 0)
400+
{
401+
fprintf(stderr, "test_gemm_nt_oom failed M=%d N=%d K=%d transA=%d transB=%d output_transpose=%d constantA=%d constantB=%d\n", M, N, K, transA, transB, output_transpose, constantA, constantB);
402+
}
403+
404+
return ret;
405+
}
406+
407+
static int test_gemm_5(int M, int N, int K)
408+
{
409+
return 0
410+
|| test_gemm_nt_oom(M, N, K, 0, 0, 0, 0, 0)
411+
|| test_gemm_nt_oom(M, N, K, 0, 1, 0, 0, 0)
412+
|| test_gemm_nt_oom(M, N, K, 1, 0, 1, 0, 0)
413+
|| test_gemm_nt_oom(M, N, K, 1, 1, 1, 0, 0)
414+
415+
|| test_gemm_nt_oom(M, N, K, 0, 0, 1, 1, 0)
416+
|| test_gemm_nt_oom(M, N, K, 0, 1, 1, 1, 0)
417+
|| test_gemm_nt_oom(M, N, K, 1, 0, 0, 1, 0)
418+
|| test_gemm_nt_oom(M, N, K, 1, 1, 0, 1, 0)
419+
420+
|| test_gemm_nt_oom(M, N, K, 0, 0, 0, 0, 1)
421+
|| test_gemm_nt_oom(M, N, K, 0, 1, 1, 0, 1)
422+
|| test_gemm_nt_oom(M, N, K, 1, 0, 0, 0, 1)
423+
|| test_gemm_nt_oom(M, N, K, 1, 1, 1, 0, 1)
424+
425+
|| test_gemm_nt_oom(M, N, K, 0, 0, 1, 1, 1)
426+
|| test_gemm_nt_oom(M, N, K, 0, 1, 0, 1, 1)
427+
|| test_gemm_nt_oom(M, N, K, 1, 0, 1, 1, 1)
428+
|| test_gemm_nt_oom(M, N, K, 1, 1, 0, 1, 1);
429+
}
430+
366431
int main()
367432
{
368433
SRAND(7767517);
@@ -391,6 +456,10 @@ int main()
391456
if (ret2 != 0)
392457
return ret2;
393458
#endif
459+
460+
int ret3 = test_gemm_5(M, N, K);
461+
if (ret3 != 0)
462+
return ret;
394463
}
395464

396465
return 0;

tests/testutil.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1819,9 +1819,13 @@ int test_layer_oom_opt(const char* layer_type, const ncnn::ParamDict& pd, const
18191819
op->load_model(mb);
18201820

18211821
ncnn::Option opt = _opt;
1822-
opt.num_threads = 1;
18231822
opt.use_vulkan_compute = false;
18241823

1824+
if (flag & TEST_LAYER_ENABLE_THREADING)
1825+
opt.num_threads = ncnn::get_physical_big_cpu_count();
1826+
else
1827+
opt.num_threads = 1;
1828+
18251829
op->create_pipeline(opt);
18261830

18271831
if (!op->support_packing && _opt.use_packing_layout)
@@ -1993,9 +1997,13 @@ int test_layer_oom_opt(const char* layer_type, const ncnn::ParamDict& pd, const
19931997
op->load_model(mb);
19941998

19951999
ncnn::Option opt = _opt;
1996-
opt.num_threads = 1;
19972000
opt.use_vulkan_compute = false;
19982001

2002+
if (flag & TEST_LAYER_ENABLE_THREADING)
2003+
opt.num_threads = ncnn::get_physical_big_cpu_count();
2004+
else
2005+
opt.num_threads = 1;
2006+
19992007
op->create_pipeline(opt);
20002008

20012009
if (!op->support_packing && _opt.use_packing_layout)

0 commit comments

Comments
 (0)