@@ -149,19 +149,6 @@ int SDPA_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to
149149 const int past_seqlen = kv_cache ? past_key.h : 0 ;
150150 const int dst_seqlen = past_seqlen + cur_seqlen;
151151
152- // Unpack mask if necessary
153- Mat attn_mask_blob_unpacked;
154- if (attn_mask && attn_mask_blob.elempack != 1 )
155- {
156- convert_packing (attn_mask_blob, attn_mask_blob_unpacked, 1 , opt);
157- if (attn_mask_blob_unpacked.empty ())
158- return -100 ;
159- }
160- else
161- {
162- attn_mask_blob_unpacked = attn_mask_blob;
163- }
164-
165152 Mat key;
166153 if (past_seqlen > 0 )
167154 {
@@ -220,7 +207,6 @@ int SDPA_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to
220207 return -100 ;
221208
222209 std::vector<int > retqks (num_heads);
223- std::vector<int > retqkvs (num_heads);
224210
225211 // Dynamic Scale Calculation and Beta Correction
226212 Layer* _qk_gemm = qk_gemm;
@@ -266,7 +252,7 @@ int SDPA_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to
266252 if (attn_mask)
267253 {
268254 // Ensure mask is 2D for Gemm auto-broadcast detection
269- Mat maskm = attn_mask_blob_unpacked ;
255+ Mat maskm = attn_mask_blob ;
270256 if (maskm.dims == 3 )
271257 {
272258 // If c > 1, pick i-th head mask. If c == 1, pick 0-th (broadcast)
@@ -305,6 +291,8 @@ int SDPA_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to
305291 return retqk;
306292
307293 // 3. Attn * V
294+ std::vector<int > retqkvs (num_heads);
295+
308296 #pragma omp parallel for num_threads(opt.num_threads)
309297 for (int i = 0 ; i < num_heads; i++)
310298 {
0 commit comments