diff --git a/CHANGELOG.MD b/CHANGELOG.MD
index 381e1af..874e395 100644
--- a/CHANGELOG.MD
+++ b/CHANGELOG.MD
@@ -12,6 +12,7 @@ _released 04--2026
 
 ### Added
  - **AI Evaluation Template Support**: Uploading test result support for TestRail's AI Evaluation Template with multi-dimensional quality ratings. See README "AI Evaluation Template Support" section for complete examples.
+ - **Multi-Step AI Evaluation Workflows**: Support for combining step-level execution tracking (`testrail_result_step`) with overall quality ratings in AI Evaluation tests. See README "Multi-Step AI Evaluation Workflows" section.
  - **Global Quality Rating via `--result-fields`**: Added support for applying quality ratings to all test results using `--result-fields quality_rating:'{"category": value}'`. Test-specific quality ratings in XML/JSON properties take precedence over CLI global ratings.
 
 ## [1.14.1]
diff --git a/README.md b/README.md
index e7abcc6..aaa78ed 100644
--- a/README.md
+++ b/README.md
@@ -690,6 +690,79 @@ trcli parse_robot \
   --suite-id 100
 ```
 
+### Multi-Step AI Evaluation Workflows
+
+For complex AI systems with multiple pipeline stages (like RAG, multi-agent systems, or sequential AI workflows), you can combine **step-level execution tracking** with **overall quality assessment** in your AI Evaluation tests. quality_rating result field can be added to to Test Case (Steps)
+
+#### How It Works
+
+**Step-Level Tracking:**
+- Each step has its own **status** (passed, failed, skipped, untested)
+- See exactly where in the pipeline the failure occurred
+
+**Overall Quality Rating:**
+- One **quality_rating** applies to the entire test result 
+- Assess the final output quality across multiple dimensions
+
+#### JUnit XML Example
+
+```xml
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuites name="RAG Pipeline Tests" tests="1" failures="1" time="10.5">
+  <testsuite name="Document QA" tests="1" failures="1" time="10.5">
+
+    <testcase classname="ai.rag.DocumentQA" name="C1000_test_rag_pipeline" time="10.5">
+      <properties>
+        <property name="test_id" value="C1000"/>
+
+        <!-- Step-Level Execution Tracking -->
+        <property name="testrail_result_step" value="passed:Step 1 Query Understanding"/>
+        <property name="testrail_result_step" value="passed:Step 2 Document Retrieval"/>
+        <property name="testrail_result_step" value="failed:Step 3 Answer Generation"/>
+        <property name="testrail_result_step" value="untested:Step 4 Response Validation"/>
+
+        <!-- Overall Quality Rating -->
+        <property name="quality_rating" value='{"factual_accuracy": 2, "coherence": 3, "completeness": 1}'/>
+
+        <!-- AI Context Fields (not applicable to Test Case (Steps) -->
+        <property name="testrail_result_field" value="custom_ai_input:What programming language is used for machine learning?"/>
+        <property name="testrail_result_field" value="custom_ai_output:JavaScript is the primary language for machine learning."/>
+        <property name="testrail_result_field" value="custom_ai_traces:https://logs.example.com/trace/rag-001"/>
+        <property name="testrail_result_field" value="custom_ai_latency:10.5 seconds"/>
+      </properties>
+      <failure message="Answer generation produced factually incorrect response"/>
+    </testcase>
+
+  </testsuite>
+</testsuites>
+```
+
+**Upload Command:**
+```bash
+trcli parse_junit \
+  -f rag_pipeline_results.xml \
+  --project-id 1 \
+  --suite-id 100
+```
+
+#### Important Notes
+
+1. **Quality Rating Scope**: The `quality_rating` applies to the **entire test result**, not individual steps. It represents the overall quality of the AI system's final output.
+
+2. **Step Status Format**: Use `status:description` format for step-level tracking:
+   - `passed:Step 1 Query Understanding`
+   - `failed:Step 3 Answer Generation`
+   - `skipped:Optional Enhancement`
+   - `untested:Step 4 Response Validation`
+
+3. **Available Step Statuses**:
+   - `passed` (status_id: 1) - Step completed successfully
+   - `untested` (status_id: 3) - Step not executed
+   - `skipped` (status_id: 4) - Step intentionally skipped
+   - `failed` (status_id: 5) - Step failed
+
+4. **Test Status Aggregation**: The overall test status follows **fail-fast** logic - if any step fails, the entire test fails.
+
 ## Behavior-Driven Development (BDD) Support
 
 The TestRail CLI provides comprehensive support for Behavior-Driven Development workflows using Gherkin syntax. The BDD features enable you to manage test cases written in Gherkin format, execute BDD tests with various frameworks (Cucumber, Behave, pytest-bdd, etc.), and seamlessly upload results to TestRail.
diff --git a/tests/test_data/XML/sample_ai_eval_multistep_workflow.xml b/tests/test_data/XML/sample_ai_eval_multistep_workflow.xml
new file mode 100644
index 0000000..6f8220b
--- /dev/null
+++ b/tests/test_data/XML/sample_ai_eval_multistep_workflow.xml
@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuites name="RAG Pipeline - AI Evaluation" tests="3" failures="2" errors="0" time="35.2">
+
+  <!-- Suite 1: Document QA Tests -->
+  <testsuite name="Document QA RAG Pipeline" tests="3" failures="2" errors="0" time="35.2">
+
+    <!-- Test 1: Successful RAG Pipeline (All Steps Pass) -->
+    <testcase classname="ai.rag.DocumentQA" name="C1000_test_rag_pipeline_success" time="12.5">
+      <properties>
+        <property name="test_id" value="C1000"/>
+
+        <!-- Step-Level Execution Tracking -->
+        <property name="testrail_result_step" value="passed:Step 1 Query Understanding"/>
+        <property name="testrail_result_step" value="passed:Step 2 Document Retrieval"/>
+        <property name="testrail_result_step" value="passed:Step 3 Answer Generation"/>
+        <property name="testrail_result_step" value="passed:Step 4 Response Validation"/>
+
+        <!-- Overall Quality Rating (High Quality) -->
+        <property name="quality_rating" value='{"factual_accuracy": 5, "coherence": 5, "completeness": 4, "relevance": 5}'/>
+
+        <!-- AI Context Fields -->
+        <property name="testrail_result_field" value="custom_ai_input:What is the capital of France?"/>
+        <property name="testrail_result_field" value="custom_ai_output:The capital of France is Paris. Paris is the largest city in France and has been the capital since 987 AD."/>
+        <property name="testrail_result_field" value="custom_ai_traces:https://logs.example.com/trace/rag-success-001"/>
+        <property name="testrail_result_field" value="custom_ai_latency:12.5 seconds"/>
+      </properties>
+    </testcase>
+
+    <!-- Test 2: Failed Answer Generation (Step 3 Fails) -->
+    <testcase classname="ai.rag.DocumentQA" name="C1001_test_rag_pipeline_factual_error" time="10.5">
+      <properties>
+        <property name="test_id" value="C1001"/>
+
+        <!-- Step-Level Execution Tracking -->
+        <property name="testrail_result_step" value="passed:Step 1 Query Understanding"/>
+        <property name="testrail_result_step" value="passed:Step 2 Document Retrieval"/>
+        <property name="testrail_result_step" value="failed:Step 3 Answer Generation"/>
+        <property name="testrail_result_step" value="untested:Step 4 Response Validation"/>
+
+        <!-- Overall Quality Rating (Low Due to Factual Error) -->
+        <property name="quality_rating" value='{"factual_accuracy": 1, "coherence": 3, "completeness": 2, "relevance": 2}'/>
+
+        <!-- AI Context Fields -->
+        <property name="testrail_result_field" value="custom_ai_input:What programming language is primarily used for machine learning?"/>
+        <property name="testrail_result_field" value="custom_ai_output:JavaScript is the primary language for machine learning, widely used in neural networks and deep learning."/>
+        <property name="testrail_result_field" value="custom_ai_traces:https://logs.example.com/trace/rag-failure-001"/>
+        <property name="testrail_result_field" value="custom_ai_latency:10.5 seconds"/>
+      </properties>
+      <failure message="Answer generation produced factually incorrect response">
+        Expected: Python is the primary language for machine learning
+        Actual: JavaScript is the primary language for machine learning
+
+        Issue: Model hallucinated incorrect information despite correct document retrieval
+        Impact: Users receive misleading information that could affect decision-making
+      </failure>
+    </testcase>
+
+    <!-- Test 3: Document Retrieval Failure (Step 2 Fails) -->
+    <testcase classname="ai.rag.DocumentQA" name="C1002_test_rag_pipeline_retrieval_failure" time="12.2">
+      <properties>
+        <property name="test_id" value="C1002"/>
+
+        <!-- Step-Level Execution Tracking -->
+        <property name="testrail_result_step" value="passed:Step 1 Query Understanding"/>
+        <property name="testrail_result_step" value="failed:Step 2 Document Retrieval"/>
+        <property name="testrail_result_step" value="untested:Step 3 Answer Generation"/>
+        <property name="testrail_result_step" value="untested:Step 4 Response Validation"/>
+
+        <!-- Overall Quality Rating (Low Due to No Relevant Documents) -->
+        <property name="quality_rating" value='{"factual_accuracy": 0, "coherence": 1, "completeness": 0, "relevance": 1}'/>
+
+        <!-- AI Context Fields -->
+        <property name="testrail_result_field" value="custom_ai_input:Explain the Heisenberg uncertainty principle in quantum mechanics"/>
+        <property name="testrail_result_field" value="custom_ai_output:I don't have enough information to answer your question about the Heisenberg uncertainty principle."/>
+        <property name="testrail_result_field" value="custom_ai_traces:https://logs.example.com/trace/rag-retrieval-failure-001"/>
+        <property name="testrail_result_field" value="custom_ai_latency:12.2 seconds"/>
+      </properties>
+      <failure message="Document retrieval failed to find relevant sources">
+        Expected: Retrieved at least 3 relevant documents about quantum mechanics
+        Actual: Retrieved 0 relevant documents (only found documents about classical physics)
+
+        Issue: Vector search embeddings failed to capture semantic meaning of quantum mechanics query
+        Impact: System cannot provide accurate answers for domain-specific questions
+        Recommendation: Retrain embedding model with physics-domain knowledge or use specialized vector database
+      </failure>
+    </testcase>
+
+  </testsuite>
+
+</testsuites>
diff --git a/tests/test_junit_quality_rating.py b/tests/test_junit_quality_rating.py
index 7555e78..116694d 100644
--- a/tests/test_junit_quality_rating.py
+++ b/tests/test_junit_quality_rating.py
@@ -259,3 +259,253 @@ def test_backward_compatibility_no_quality_rating(self, env, tmp_path):
         assert "case_id" in result_dict
         assert "status_id" in result_dict
         assert "custom_field" in result_dict
+
+    # ========== Step-Level Results with Quality Rating ==========
+
+    def test_step_level_results_with_quality_rating(self, env, tmp_path):
+        """Test AI Evaluation with step-level results and overall quality rating"""
+        xml_content = """<?xml version="1.0" encoding="UTF-8"?>
+<testsuites name="AI Tests" tests="1" failures="1" errors="0" time="10.0">
+  <testsuite name="Multi-Step AI Workflow" tests="1" failures="1" errors="0" time="10.0">
+    <testcase classname="ai.RAGPipeline" name="C500_test_rag_pipeline" time="10.0">
+      <properties>
+        <property name="test_id" value="C500"/>
+        <property name="testrail_result_step" value="passed:Step 1 Query Understanding"/>
+        <property name="testrail_result_step" value="passed:Step 2 Document Retrieval"/>
+        <property name="testrail_result_step" value="failed:Step 3 Answer Generation"/>
+        <property name="testrail_result_step" value="untested:Step 4 Response Validation"/>
+        <property name="quality_rating" value='{"factual_accuracy": 2, "coherence": 3, "completeness": 1}'/>
+        <property name="testrail_result_field" value="custom_ai_input:What is Python?"/>
+        <property name="testrail_result_field" value="custom_ai_output:Python is a snake..."/>
+      </properties>
+      <failure message="Answer generation produced factually incorrect response"/>
+    </testcase>
+  </testsuite>
+</testsuites>"""
+
+        xml_file = tmp_path / "test_step_level_quality.xml"
+        xml_file.write_text(xml_content)
+
+        env.file = xml_file
+        parser = JunitParser(env)
+        suites = parser.parse_file()
+
+        test_case = suites[0].testsections[0].testcases[0]
+        result = test_case.result
+
+        # Verify step-level results
+        assert len(result.custom_step_results) == 4
+        assert result.custom_step_results[0].content == "Step 1 Query Understanding"
+        assert result.custom_step_results[0].status_id == 1  # Passed
+        assert result.custom_step_results[1].content == "Step 2 Document Retrieval"
+        assert result.custom_step_results[1].status_id == 1  # Passed
+        assert result.custom_step_results[2].content == "Step 3 Answer Generation"
+        assert result.custom_step_results[2].status_id == 5  # Failed
+        assert result.custom_step_results[3].content == "Step 4 Response Validation"
+        assert result.custom_step_results[3].status_id == 3  # Untested
+
+        # Verify overall quality rating
+        assert result.quality_rating == {"factual_accuracy": 2, "coherence": 3, "completeness": 1}
+
+        # Verify overall test status is failed
+        assert result.status_id == 5
+
+    def test_step_level_serialization_with_quality_rating(self, env, tmp_path):
+        """Test that step-level results and quality rating serialize correctly"""
+        xml_content = """<?xml version="1.0" encoding="UTF-8"?>
+<testsuites name="AI Tests" tests="1" failures="0" errors="0" time="5.0">
+  <testsuite name="Success Flow" tests="1" failures="0" errors="0" time="5.0">
+    <testcase classname="ai.ChatBot" name="C501_test_chatbot_steps" time="5.0">
+      <properties>
+        <property name="test_id" value="C501"/>
+        <property name="testrail_result_step" value="passed:Intent Detection"/>
+        <property name="testrail_result_step" value="passed:Response Generation"/>
+        <property name="testrail_result_step" value="passed:Quality Check"/>
+        <property name="quality_rating" value='{"accuracy": 5, "relevance": 5, "tone": 4}'/>
+      </properties>
+    </testcase>
+  </testsuite>
+</testsuites>"""
+
+        xml_file = tmp_path / "test_step_serialization.xml"
+        xml_file.write_text(xml_content)
+
+        env.file = xml_file
+        parser = JunitParser(env)
+        suites = parser.parse_file()
+
+        test_case = suites[0].testsections[0].testcases[0]
+        result_dict = test_case.result.to_dict()
+
+        # Verify custom_step_results serialization
+        assert "custom_step_results" in result_dict
+        assert len(result_dict["custom_step_results"]) == 3
+        assert result_dict["custom_step_results"][0]["content"] == "Intent Detection"
+        assert result_dict["custom_step_results"][0]["status_id"] == 1
+        assert result_dict["custom_step_results"][1]["content"] == "Response Generation"
+        assert result_dict["custom_step_results"][1]["status_id"] == 1
+        assert result_dict["custom_step_results"][2]["content"] == "Quality Check"
+        assert result_dict["custom_step_results"][2]["status_id"] == 1
+
+        # Verify quality_rating at root level
+        assert "quality_rating" in result_dict
+        assert result_dict["quality_rating"] == {"accuracy": 5, "relevance": 5, "tone": 4}
+
+    def test_step_level_mixed_statuses(self, env, tmp_path):
+        """Test step-level results with various status combinations"""
+        xml_content = """<?xml version="1.0" encoding="UTF-8"?>
+<testsuites name="AI Tests" tests="1" failures="0" errors="0" time="3.0">
+  <testsuite name="Partial Success" tests="1" failures="0" errors="0" time="3.0">
+    <testcase classname="ai.Pipeline" name="C502_test_mixed_steps" time="3.0">
+      <properties>
+        <property name="test_id" value="C502"/>
+        <property name="testrail_result_step" value="passed:Pre-processing"/>
+        <property name="testrail_result_step" value="skipped:Optional Enhancement"/>
+        <property name="testrail_result_step" value="passed:Final Output"/>
+        <property name="quality_rating" value='{"quality": 4}'/>
+      </properties>
+    </testcase>
+  </testsuite>
+</testsuites>"""
+
+        xml_file = tmp_path / "test_mixed_steps.xml"
+        xml_file.write_text(xml_content)
+
+        env.file = xml_file
+        parser = JunitParser(env)
+        suites = parser.parse_file()
+
+        test_case = suites[0].testsections[0].testcases[0]
+        result = test_case.result
+
+        # Verify all step statuses
+        assert len(result.custom_step_results) == 3
+        assert result.custom_step_results[0].status_id == 1  # Passed
+        assert result.custom_step_results[1].status_id == 4  # Skipped
+        assert result.custom_step_results[2].status_id == 1  # Passed
+
+        # Overall test should pass (no failures)
+        assert result.status_id == 1
+
+        # Quality rating should be preserved
+        assert result.quality_rating == {"quality": 4}
+
+    def test_step_level_without_quality_rating(self, env, tmp_path):
+        """Test that step-level results work without quality rating (backward compatibility)"""
+        xml_content = """<?xml version="1.0" encoding="UTF-8"?>
+<testsuites name="Tests" tests="1" failures="0" errors="0" time="2.0">
+  <testsuite name="Basic Steps" tests="1" failures="0" errors="0" time="2.0">
+    <testcase classname="test.Steps" name="C503_test_steps_only" time="2.0">
+      <properties>
+        <property name="test_id" value="C503"/>
+        <property name="testrail_result_step" value="passed:Step 1"/>
+        <property name="testrail_result_step" value="passed:Step 2"/>
+      </properties>
+    </testcase>
+  </testsuite>
+</testsuites>"""
+
+        xml_file = tmp_path / "test_steps_no_rating.xml"
+        xml_file.write_text(xml_content)
+
+        env.file = xml_file
+        parser = JunitParser(env)
+        suites = parser.parse_file()
+
+        test_case = suites[0].testsections[0].testcases[0]
+        result_dict = test_case.result.to_dict()
+
+        # Should have steps
+        assert "custom_step_results" in result_dict
+        assert len(result_dict["custom_step_results"]) == 2
+
+        # Should NOT have quality_rating
+        assert "quality_rating" not in result_dict
+
+    def test_quality_rating_without_steps(self, env, tmp_path):
+        """Test that quality rating works without step-level results"""
+        xml_content = """<?xml version="1.0" encoding="UTF-8"?>
+<testsuites name="Tests" tests="1" failures="0" errors="0" time="1.0">
+  <testsuite name="No Steps" tests="1" failures="0" errors="0" time="1.0">
+    <testcase classname="test.Simple" name="C504_test_quality_only" time="1.0">
+      <properties>
+        <property name="test_id" value="C504"/>
+        <property name="quality_rating" value='{"accuracy": 5}'/>
+      </properties>
+    </testcase>
+  </testsuite>
+</testsuites>"""
+
+        xml_file = tmp_path / "test_rating_no_steps.xml"
+        xml_file.write_text(xml_content)
+
+        env.file = xml_file
+        parser = JunitParser(env)
+        suites = parser.parse_file()
+
+        test_case = suites[0].testsections[0].testcases[0]
+        result_dict = test_case.result.to_dict()
+
+        # Should have quality_rating
+        assert "quality_rating" in result_dict
+        assert result_dict["quality_rating"] == {"accuracy": 5}
+
+        # Should NOT have custom_step_results (empty list skipped by serialization)
+        assert "custom_step_results" not in result_dict or result_dict["custom_step_results"] == []
+
+    def test_parse_sample_multistep_workflow(self, env):
+        """Test parsing the sample multi-step AI evaluation workflow file"""
+        env.file = Path(__file__).parent / "test_data/XML/sample_ai_eval_multistep_workflow.xml"
+        parser = JunitParser(env)
+        suites = parser.parse_file()
+
+        assert len(suites) == 1
+        suite = suites[0]
+        assert len(suite.testsections) == 1
+        section = suite.testsections[0]
+        assert len(section.testcases) == 3
+
+        # Test 1: All steps pass
+        test1 = section.testcases[0]
+        assert test1.result.case_id == 1000
+        assert test1.result.status_id == 1  # Passed
+        assert len(test1.result.custom_step_results) == 4
+        assert all(step.status_id == 1 for step in test1.result.custom_step_results)  # All passed
+        assert test1.result.quality_rating == {
+            "factual_accuracy": 5,
+            "coherence": 5,
+            "completeness": 4,
+            "relevance": 5,
+        }
+
+        # Test 2: Step 3 fails
+        test2 = section.testcases[1]
+        assert test2.result.case_id == 1001
+        assert test2.result.status_id == 5  # Failed
+        assert len(test2.result.custom_step_results) == 4
+        assert test2.result.custom_step_results[0].status_id == 1  # Step 1 passed
+        assert test2.result.custom_step_results[1].status_id == 1  # Step 2 passed
+        assert test2.result.custom_step_results[2].status_id == 5  # Step 3 failed
+        assert test2.result.custom_step_results[3].status_id == 3  # Step 4 untested
+        assert test2.result.quality_rating == {
+            "factual_accuracy": 1,
+            "coherence": 3,
+            "completeness": 2,
+            "relevance": 2,
+        }
+
+        # Test 3: Step 2 fails
+        test3 = section.testcases[2]
+        assert test3.result.case_id == 1002
+        assert test3.result.status_id == 5  # Failed
+        assert len(test3.result.custom_step_results) == 4
+        assert test3.result.custom_step_results[0].status_id == 1  # Step 1 passed
+        assert test3.result.custom_step_results[1].status_id == 5  # Step 2 failed
+        assert test3.result.custom_step_results[2].status_id == 3  # Step 3 untested
+        assert test3.result.custom_step_results[3].status_id == 3  # Step 4 untested
+        assert test3.result.quality_rating == {
+            "factual_accuracy": 0,
+            "coherence": 1,
+            "completeness": 0,
+            "relevance": 1,
+        }