KG-381 Add input and total tokens count in internal Mock Test framework (#1145)

aozherelyeva · web-flow · commit e2a5cda08253 · 2025-11-21T13:31:52.000+01:00
## Motivation and Context  [KG-381](https://youtrack.jetbrains.com/issue/KG-381) Add input and total tokens count in internal Mock Test framework 1. The new `updateTokenCounts` function calculates input, output, and total token counts. 2. Updated response metadata creation to include token counts where applicable. 3. Removed unused `inputTokensCount` variable. 4. Added tests for token counter in the MockLLM responses. ## Breaking Changes  None. --- #### Type of the changes - [x] New feature (non-breaking change which adds functionality) - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [x] Tests improvement - [x] Refactoring #### Checklist - [x] The pull request has a description of the proposed change - [x] I read the [Contributing Guidelines](https://github.com/JetBrains/koog/blob/main/CONTRIBUTING.md) before opening the pull request - [x] The pull request uses **`develop`** as the base branch - [x] Tests for the changes have been added - [x] All new and existing tests passed ##### Additional steps for pull requests adding a new feature - [x] An issue describing the proposed change exists - [x] The pull request includes a link to the issue - [x] The change was discussed and approved in the issue - [x] Docs have been added / updated
diff --git a/agents/agents-features/agents-features-opentelemetry/src/jvmTest/kotlin/ai/koog/agents/features/opentelemetry/feature/span/OpenTelemetryInferenceSpanTest.kt b/agents/agents-features/agents-features-opentelemetry/src/jvmTest/kotlin/ai/koog/agents/features/opentelemetry/feature/span/OpenTelemetryInferenceSpanTest.kt
@@ -445,7 +445,7 @@ class OpenTelemetryInferenceSpanTest : OpenTelemetryTestBase() {
     }
 
     @Test
-    fun `test inference span contains tokes data`() = runTest {
+    fun `test inference span contains tokens data`() = runTest {
         val userInput = USER_PROMPT_PARIS
         val mockLLMResponse = MOCK_LLM_RESPONSE_PARIS
         val model = defaultModel
@@ -492,7 +492,10 @@ class OpenTelemetryInferenceSpanTest : OpenTelemetryTestBase() {
                         "gen_ai.operation.name" to "chat",
                         "gen_ai.request.temperature" to temperature,
                         "gen_ai.response.finish_reasons" to listOf(FinishReasonType.Stop.id),
-                        "gen_ai.usage.output_tokens" to tokenizer.countTokens(text = mockLLMResponse).toLong()
+                        "gen_ai.usage.input_tokens" to tokenizer.countTokens(text = userInput).toLong(),
+                        "gen_ai.usage.output_tokens" to tokenizer.countTokens(text = mockLLMResponse).toLong(),
+                        "gen_ai.usage.total_tokens" to tokenizer.countTokens(text = userInput)
+                            .toLong() + tokenizer.countTokens(text = mockLLMResponse).toLong(),
                     ),
                     "events" to mapOf(
                         "gen_ai.system.message" to mapOf(
diff --git a/agents/agents-test/src/commonMain/kotlin/ai/koog/agents/testing/tools/MockLLMBuilder.kt b/agents/agents-test/src/commonMain/kotlin/ai/koog/agents/testing/tools/MockLLMBuilder.kt
@@ -114,7 +114,7 @@ public class MockLLMBuilder(private val clock: Clock, private val tokenizer: Tok
      *
      * Useful in scenarios where the mock response handling involves mixed results
      * from the LLM, and there is a need to differentiate between handling the general
-     * last message vs the last assistant-specific message.
+     * last message vs. the last assistant-specific message.
      */
     public var handleLastAssistantMessage: Boolean = false
 
@@ -184,7 +184,12 @@ public class MockLLMBuilder(private val clock: Clock, private val tokenizer: Tok
                     id = toolCallId,
                     tool = tool.name,
                     content = toolContent,
-                    metaInfo = ResponseMetaInfo.create(clock, outputTokensCount = tokenizer?.countTokens(toolContent))
+                    metaInfo = ResponseMetaInfo.create(
+                        clock,
+                        inputTokensCount = null, // Will be updated at runtime with actual input
+                        outputTokensCount = tokenizer?.countTokens(toolContent),
+                        totalTokensCount = null // Will be calculated at runtime
+                    )
                 )
             )
         }
@@ -208,7 +213,12 @@ public class MockLLMBuilder(private val clock: Clock, private val tokenizer: Tok
                     id = null,
                     tool = tool.name,
                     content = toolContent,
-                    metaInfo = ResponseMetaInfo.create(clock, outputTokensCount = tokenizer?.countTokens(toolContent))
+                    metaInfo = ResponseMetaInfo.create(
+                        clock,
+                        inputTokensCount = null, // Will be updated at runtime with actual input
+                        outputTokensCount = tokenizer?.countTokens(toolContent),
+                        totalTokensCount = null // Will be calculated at runtime
+                    )
                 )
             )
         }
@@ -231,7 +241,12 @@ public class MockLLMBuilder(private val clock: Clock, private val tokenizer: Tok
                     id = null,
                     tool = tool.name,
                     content = toolContent,
-                    metaInfo = ResponseMetaInfo.create(clock, outputTokensCount = tokenizer?.countTokens(toolContent))
+                    metaInfo = ResponseMetaInfo.create(
+                        clock,
+                        inputTokensCount = null, // Will be updated at runtime with actual input
+                        outputTokensCount = tokenizer?.countTokens(toolContent),
+                        totalTokensCount = null // Will be calculated at runtime
+                    )
                 )
             }
         }
@@ -253,7 +268,12 @@ public class MockLLMBuilder(private val clock: Clock, private val tokenizer: Tok
                     id = null,
                     tool = tool.name,
                     content = toolContent,
-                    metaInfo = ResponseMetaInfo.create(clock, outputTokensCount = tokenizer?.countTokens(toolContent))
+                    metaInfo = ResponseMetaInfo.create(
+                        clock,
+                        inputTokensCount = null, // Will be updated at runtime with actual input
+                        outputTokensCount = tokenizer?.countTokens(toolContent),
+                        totalTokensCount = null // Will be calculated at runtime
+                    )
                 )
             }
         }
@@ -278,7 +298,12 @@ public class MockLLMBuilder(private val clock: Clock, private val tokenizer: Tok
                     id = null,
                     tool = tool.name,
                     content = toolContent,
-                    metaInfo = ResponseMetaInfo.create(clock, outputTokensCount = tokenizer?.countTokens(toolContent))
+                    metaInfo = ResponseMetaInfo.create(
+                        clock,
+                        inputTokensCount = null, // Will be updated at runtime with actual input
+                        outputTokensCount = tokenizer?.countTokens(toolContent),
+                        totalTokensCount = null // Will be calculated at runtime
+                    )
                 )
             }
         }
@@ -306,7 +331,12 @@ public class MockLLMBuilder(private val clock: Clock, private val tokenizer: Tok
                     id = null,
                     tool = tool.name,
                     content = toolContent,
-                    metaInfo = ResponseMetaInfo.create(clock, outputTokensCount = tokenizer?.countTokens(toolContent))
+                    metaInfo = ResponseMetaInfo.create(
+                        clock,
+                        inputTokensCount = null, // Cannot determine input tokens for conditional matches without the actual input string
+                        outputTokensCount = tokenizer?.countTokens(toolContent),
+                        totalTokensCount = null // Will be calculated at runtime
+                    )
                 )
             )
         }
@@ -330,7 +360,12 @@ public class MockLLMBuilder(private val clock: Clock, private val tokenizer: Tok
                     id = null,
                     tool = tool.name,
                     content = toolContent,
-                    metaInfo = ResponseMetaInfo.create(clock, outputTokensCount = tokenizer?.countTokens(toolContent))
+                    metaInfo = ResponseMetaInfo.create(
+                        clock,
+                        inputTokensCount = null, // Cannot determine input tokens for conditional matches without the actual input string
+                        outputTokensCount = tokenizer?.countTokens(toolContent),
+                        totalTokensCount = null // Will be calculated at runtime
+                    )
                 )
             }
         }
@@ -366,7 +401,12 @@ public class MockLLMBuilder(private val clock: Clock, private val tokenizer: Tok
                     id = null,
                     tool = tool.name,
                     content = toolContent,
-                    metaInfo = ResponseMetaInfo.create(clock, outputTokensCount = tokenizer?.countTokens(toolContent))
+                    metaInfo = ResponseMetaInfo.create(
+                        clock,
+                        inputTokensCount = tokenizer?.countTokens(pattern),
+                        outputTokensCount = tokenizer?.countTokens(toolContent),
+                        totalTokensCount = null // Will be calculated at runtime
+                    )
                 )
             }
         }
@@ -792,24 +832,35 @@ public class MockLLMBuilder(private val clock: Clock, private val tokenizer: Tok
             texts.map { text ->
                 Message.Assistant(
                     text,
-                    ResponseMetaInfo.create(clock, outputTokensCount = tokenizer?.countTokens(text))
+                    ResponseMetaInfo.create(
+                        clock,
+                        inputTokensCount = null, // Will be updated at runtime with actual input
+                        outputTokensCount = tokenizer?.countTokens(text),
+                        totalTokensCount = null // Will be calculated at runtime
+                    )
                 )
             }
         }
 
-        val combinedExactMatches = (processedAssistantExactMatches.keys + toolCallExactMatches.keys).associateWith { key ->
-            val assistantList = processedAssistantExactMatches[key] ?: emptyList()
-            val toolCallList = toolCallExactMatches[key] ?: emptyList()
-            assistantList + toolCallList
-        }
+        val combinedExactMatches =
+            (processedAssistantExactMatches.keys + toolCallExactMatches.keys).associateWith { key ->
+                val assistantList = processedAssistantExactMatches[key] ?: emptyList()
+                val toolCallList = toolCallExactMatches[key] ?: emptyList()
+                assistantList + toolCallList
+            }
 
         // Partial Matches
         val processedAssistantPartialMatches = assistantPartialMatches.mapValues { (_, value) ->
             val texts = value.map { text -> text.trimIndent() }
             texts.map { text ->
                 Message.Assistant(
                     text,
-                    ResponseMetaInfo.create(clock, outputTokensCount = tokenizer?.countTokens(text))
+                    ResponseMetaInfo.create(
+                        clock,
+                        inputTokensCount = null, // Will be updated at runtime with actual input
+                        outputTokensCount = tokenizer?.countTokens(text),
+                        totalTokensCount = null // Will be calculated at runtime
+                    )
                 )
             }
         }
@@ -827,23 +878,39 @@ public class MockLLMBuilder(private val clock: Clock, private val tokenizer: Tok
                 textResponse.map { response ->
                     Message.Assistant(
                         content = response,
-                        metaInfo = ResponseMetaInfo.create(clock)
+                        metaInfo = ResponseMetaInfo.create(
+                            clock,
+                            inputTokensCount = null, // Cannot determine input tokens for conditional matches without the actual input string
+                            outputTokensCount = tokenizer?.countTokens(response),
+                            totalTokensCount = null // Will be calculated at runtime
+                        )
                     )
                 }
             } ?: emptyMap()
 
-        val combinedConditionalMatches = (processedAssistantConditionalMatches.keys + toolCallConditionalMatches.keys).associateWith { key ->
-            buildList {
-                processedAssistantConditionalMatches[key]?.let { addAll(it) }
-                toolCallConditionalMatches[key]?.let { addAll(it) }
+        val combinedConditionalMatches =
+            (processedAssistantConditionalMatches.keys + toolCallConditionalMatches.keys).associateWith { key ->
+                buildList {
+                    processedAssistantConditionalMatches[key]?.let { addAll(it) }
+                    toolCallConditionalMatches[key]?.let { addAll(it) }
+                }
             }
-        }
 
         val responseMatcher = ResponseMatcher(
             partialMatches = combinedPartialMatches.takeIf { it.isNotEmpty() },
             exactMatches = combinedExactMatches.takeIf { it.isNotEmpty() },
             conditional = combinedConditionalMatches,
-            defaultResponse = listOf(Message.Assistant(defaultResponse, ResponseMetaInfo.create(clock)))
+            defaultResponse = listOf(
+                Message.Assistant(
+                    defaultResponse,
+                    ResponseMetaInfo.create(
+                        clock,
+                        inputTokensCount = null, // Will be updated at runtime with actual input
+                        outputTokensCount = tokenizer?.countTokens(defaultResponse),
+                        totalTokensCount = null // Will be calculated at runtime
+                    )
+                )
+            )
         )
 
         val moderationResponseMatcher = ResponseMatcher(
@@ -942,7 +1009,7 @@ public class DefaultResponseReceiver(
  * @param clock: A clock that is used for mock message timestamps
  * @param tokenizer: Tokenizer that will be used to estimate token counts in mock messages
  * @param init A lambda with receiver that configures the mock LLM executor
- * @return A configured PromptExecutor for testing
+ * @return Сonfigured PromptExecutor for testing
  *
  * Example usage:
  * ```kotlin
diff --git a/agents/agents-test/src/commonMain/kotlin/ai/koog/agents/testing/tools/MockLLMExecutor.kt b/agents/agents-test/src/commonMain/kotlin/ai/koog/agents/testing/tools/MockLLMExecutor.kt
@@ -7,6 +7,7 @@ import ai.koog.prompt.dsl.Prompt
 import ai.koog.prompt.executor.model.PromptExecutor
 import ai.koog.prompt.llm.LLModel
 import ai.koog.prompt.message.Message
+import ai.koog.prompt.message.ResponseMetaInfo
 import ai.koog.prompt.streaming.StreamFrame
 import ai.koog.prompt.streaming.toStreamFrame
 import ai.koog.prompt.tokenizer.Tokenizer
@@ -38,9 +39,9 @@ internal class ResponseMatcher<TResponse>(
  *
  * This class simulates an LLM by returning predefined responses based on the input prompt.
  * It supports different types of matching:
- * 1. Exact matching - Returns a response when the input exactly matches a pattern
+ * 1. Exact matching - Returns a response when the input exactly matches pattern
  * 2. Partial matching - Returns a response when the input contains a pattern
- * 3. Conditional matching - Returns a response when the input satisfies a condition
+ * 3. Conditional matching - Returns a response when the input satisfies condition
  * 4. Default response - Returns a default response when no other matches are found
  *
  * It also supports tool calls and can be configured to return specific tool results.
@@ -138,7 +139,7 @@ internal class MockLLMExecutor(
      * 1. First checking for exact matches
      * 2. Then checking for partial matches
      * 3. Then checking for conditional matches
-     * 4. Finally returning the default response if no matches are found
+     * 4. Finally, returning the default response if no matches are found
      *
      * @param prompt The prompt to handle
      * @return The appropriate response based on the configured matches
@@ -147,8 +148,6 @@ internal class MockLLMExecutor(
         logger.debug { "Handling prompt with messages:" }
         prompt.messages.forEach { logger.debug { "Message content: ${it.content.take(300)}..." } }
 
-        val inputTokensCount = tokenizer?.let { prompt.messages.map { it.content }.sumOf(it::countTokens) }
-
         val lastMessage = getLastMessage(prompt) ?: return responseMatcher.defaultResponse
 
         // Check the exact response match
@@ -170,20 +169,19 @@ internal class MockLLMExecutor(
         }
 
         // Check request conditions
-        val conditionals = getConditionalResponse(lastMessage, inputTokensCount) ?: listOf()
+        val conditionals = getConditionalResponse(lastMessage) ?: listOf()
 
         val result = (exactMatchedResponse ?: listOf()) + partiallyMatchedResponse + conditionals
         if (result.any()) {
-            return result
+            return updateTokenCounts(result, lastMessage.content)
         }
 
         // Process the default LLM response
-        return responseMatcher.defaultResponse
+        return updateTokenCounts(responseMatcher.defaultResponse, lastMessage.content)
     }
 
     private fun getConditionalResponse(
         lastMessage: Message,
-        inputTokensCount: Int?
     ): List<Message.Response>? = if (!responseMatcher.conditional.isNullOrEmpty()) {
         responseMatcher.conditional.entries.firstOrNull { it.key(lastMessage.content) }?.let { (_, response) ->
             logger.debug { "Returning response for conditional match: $response" }
@@ -193,6 +191,51 @@ internal class MockLLMExecutor(
         emptyList()
     }
 
+    /**
+     * Updates the token counts in response metadata to use the input string.
+     */
+    private fun updateTokenCounts(
+        responses: List<Message.Response>,
+        input: String,
+    ): List<Message.Response> {
+        if (tokenizer == null) return responses
+
+        val inputTokenCount = tokenizer.countTokens(input)
+
+        return responses.map { response ->
+            when (response) {
+                is Message.Assistant -> {
+                    val outputTokenCount = tokenizer.countTokens(response.content)
+                    val updatedMetaInfo = ResponseMetaInfo.create(
+                        clock = clock,
+                        inputTokensCount = inputTokenCount,
+                        outputTokensCount = outputTokenCount,
+                        totalTokensCount = inputTokenCount + outputTokenCount
+                    )
+                    Message.Assistant(response.content, updatedMetaInfo)
+                }
+
+                is Message.Tool.Call -> {
+                    val outputTokenCount = tokenizer.countTokens(response.content)
+                    val updatedMetaInfo = ResponseMetaInfo.create(
+                        clock = clock,
+                        inputTokensCount = inputTokenCount,
+                        outputTokensCount = outputTokenCount,
+                        totalTokensCount = inputTokenCount + outputTokenCount
+                    )
+                    Message.Tool.Call(
+                        id = response.id,
+                        tool = response.tool,
+                        content = response.content,
+                        metaInfo = updatedMetaInfo
+                    )
+                }
+
+                else -> response // Keep other response types unchanged
+            }
+        }
+    }
+
     /*
     Additional helper functions
      */
diff --git a/agents/agents-test/src/jvmTest/kotlin/ai/koog/agents/test/TokenCountTest.kt b/agents/agents-test/src/jvmTest/kotlin/ai/koog/agents/test/TokenCountTest.kt