Skip to content

Commit 2f2729d

Browse files
committed
drop unpack
1 parent c1203bc commit 2f2729d

File tree

1 file changed

+3
-15
lines changed

1 file changed

+3
-15
lines changed

src/layer/x86/sdpa_x86.cpp

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -149,19 +149,6 @@ int SDPA_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to
149149
const int past_seqlen = kv_cache ? past_key.h : 0;
150150
const int dst_seqlen = past_seqlen + cur_seqlen;
151151

152-
// Unpack mask if necessary
153-
Mat attn_mask_blob_unpacked;
154-
if (attn_mask && attn_mask_blob.elempack != 1)
155-
{
156-
convert_packing(attn_mask_blob, attn_mask_blob_unpacked, 1, opt);
157-
if (attn_mask_blob_unpacked.empty())
158-
return -100;
159-
}
160-
else
161-
{
162-
attn_mask_blob_unpacked = attn_mask_blob;
163-
}
164-
165152
Mat key;
166153
if (past_seqlen > 0)
167154
{
@@ -220,7 +207,6 @@ int SDPA_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to
220207
return -100;
221208

222209
std::vector<int> retqks(num_heads);
223-
std::vector<int> retqkvs(num_heads);
224210

225211
// Dynamic Scale Calculation and Beta Correction
226212
Layer* _qk_gemm = qk_gemm;
@@ -266,7 +252,7 @@ int SDPA_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to
266252
if (attn_mask)
267253
{
268254
// Ensure mask is 2D for Gemm auto-broadcast detection
269-
Mat maskm = attn_mask_blob_unpacked;
255+
Mat maskm = attn_mask_blob;
270256
if (maskm.dims == 3)
271257
{
272258
// If c > 1, pick i-th head mask. If c == 1, pick 0-th (broadcast)
@@ -305,6 +291,8 @@ int SDPA_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to
305291
return retqk;
306292

307293
// 3. Attn * V
294+
std::vector<int> retqkvs(num_heads);
295+
308296
#pragma omp parallel for num_threads(opt.num_threads)
309297
for (int i = 0; i < num_heads; i++)
310298
{

0 commit comments

Comments
 (0)