fuse non interleaved rotary embed

nihui · nihui · commit 2161d194d792 · 2025-11-14T19:43:15.000+08:00
diff --git a/tools/pnnx/src/pass_ncnn/fuse_convert_rotaryembed.cpp b/tools/pnnx/src/pass_ncnn/fuse_convert_rotaryembed.cpp
@@ -9,6 +9,62 @@ namespace pnnx {
 
 namespace ncnn {
 
+class fuse_rotaryembed_pass : public GraphRewriterPass
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+8 8
+pnnx.Input              input_0     0 1 input
+pnnx.Input              input_1     0 1 cos_cache
+pnnx.Input              input_2     0 1 sin_cache
+torch.tensor_split      op_0        1 2 input 19 20 dim=%split_dim indices=(%embed_dim_half)
+pnnx.Expression         op_1        1 1 20 21 expr=neg(@0)
+torch.cat               op_2        2 1 21 19 22 dim=%cat_dim
+pnnx.Expression         op_3        4 1 input cos_cache 22 sin_cache out expr=add(mul(@0,@1),mul(@2,@3))
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    const char* type_str() const
+    {
+        return "RotaryEmbed";
+    }
+
+    const char* name_str() const
+    {
+        return "rope";
+    }
+
+    bool match(const std::map<std::string, const Operator*>& matched_operators, const std::map<std::string, Parameter>& captured_params, const std::map<std::string, Attribute>& /*captured_attrs*/) const
+    {
+        const Operand* input = matched_operators.at("op_0")->inputs[0];
+        if (!input->shape.empty())
+        {
+            const int embed_dim = input->shape[input->shape.size() - 1];
+            const int embed_dim_half = captured_params.at("embed_dim_half").i;
+            if (embed_dim != embed_dim_half * 2)
+                return false;
+        }
+
+        const int split_dim = captured_params.at("split_dim").i;
+        if (split_dim != 3 && split_dim != -1)
+            return false;
+
+        const int cat_dim = captured_params.at("cat_dim").i;
+        if (cat_dim != 3 && cat_dim != -1)
+            return false;
+
+        return true;
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& /*captured_params*/) const
+    {
+        op->params["0"] = 0; // non-interleaved
+    }
+};
+
 class fuse_rotaryembed_pass_interleaved : public GraphRewriterPass
 {
 public:
@@ -72,9 +128,11 @@ pnnx.Output             output      1 0 out
 void fuse_convert_rotaryembed(Graph& graph)
 {
     fuse_rotaryembed_pass_interleaved a;
+    fuse_rotaryembed_pass b;
     int opindex = 0;
 
     pnnx_graph_rewrite(graph, &a, opindex);
+    pnnx_graph_rewrite(graph, &b, opindex);
 }
 
 } // namespace ncnn
diff --git a/tools/pnnx/tests/CMakeLists.txt b/tools/pnnx/tests/CMakeLists.txt
@@ -401,6 +401,7 @@ pnnx_add_test(transformers_mt5_attention)
 pnnx_add_test(transformers_openai_attention)
 pnnx_add_test(transformers_pegasus_attention)
 pnnx_add_test(transformers_prophetnet_attention)
+pnnx_add_test(transformers_qwen3_attention)
 pnnx_add_test(transformers_reformer_attention)
 pnnx_add_test(transformers_roberta_attention)
 pnnx_add_test(transformers_squeezebert_attention)
diff --git a/tools/pnnx/tests/test_transformers_deepseek_v3_attention.py b/tools/pnnx/tests/test_transformers_deepseek_v3_attention.py
@@ -45,7 +45,7 @@ def test():
 
     # torchscript to pnnx
     import os
-    os.system("../src/pnnx test_transformers_deepseek_v3_attention.pt inputshape=[3,16,192],[3,1,16,16] fp16=0")
+    os.system("../src/pnnx test_transformers_deepseek_v3_attention.pt inputshape=[3,16,192],[3,1,16,16]")
 
     # pnnx inference
     import test_transformers_deepseek_v3_attention_pnnx
diff --git a/tools/pnnx/tests/test_transformers_qwen3_attention.py b/tools/pnnx/tests/test_transformers_qwen3_attention.py
@@ -0,0 +1,60 @@
+# Copyright 2025 Tencent
+# SPDX-License-Identifier: BSD-3-Clause
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from packaging import version
+
+if version.parse(torch.__version__) < version.parse('2.1'):
+    exit(0)
+
+from transformers import Qwen3Config
+from transformers.models.qwen3.modeling_qwen3 import Qwen3Attention, Qwen3RotaryEmbedding
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+        config = Qwen3Config(hidden_size=192, num_attention_heads=16, num_key_value_heads=16, q_lora_rank=64, kv_lora_rank=128, attn_implementation='sdpa')
+        self.rotary_emb = Qwen3RotaryEmbedding(config)
+        self.attn0 = Qwen3Attention(config, layer_idx=1)
+
+    def forward(self, x, mask0):
+        batch_size = x.size(0)
+        seq_length = x.size(1)
+        position_ids = torch.arange(seq_length, dtype=torch.long).unsqueeze(0).expand(batch_size, -1)
+        position_embeddings = self.rotary_emb(x, position_ids)
+        out0 = self.attn0(x, position_embeddings=position_embeddings, attention_mask=mask0, head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, output_attentions=True)
+        return out0[0]
+
+def test():
+    net = Model()
+    net.eval()
+
+    torch.manual_seed(0)
+    x = torch.rand(3, 16, 192)
+
+    mask0 = torch.rand(3, 1, 16, 16)
+
+    a = net(x, mask0)
+
+    # export torchscript
+    mod = torch.jit.trace(net, (x, mask0))
+    mod.save("test_transformers_qwen3_attention.pt")
+
+    # torchscript to pnnx
+    import os
+    os.system("../src/pnnx test_transformers_qwen3_attention.pt inputshape=[3,16,192],[3,1,16,16]")
+
+    # pnnx inference
+    import test_transformers_qwen3_attention_pnnx
+    b = test_transformers_qwen3_attention_pnnx.test_inference()
+
+    return torch.allclose(a, b, 1e-4, 1e-4)
+
+if __name__ == "__main__":
+    if test():
+        exit(0)
+    else:
+        exit(1)