Skip to content

Commit 7a29d49

Browse files
add tools (#14)
1 parent e1ea19b commit 7a29d49

4 files changed

Lines changed: 130 additions & 1 deletion

File tree

internal/provider/openai/cost.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,11 +219,23 @@ var OpenAiPerThousandCallsToolCost = map[string]float64{
219219
"web_search": 10.0,
220220
"web_search_preview": 25.0,
221221
"web_search_preview_reasoning": 10.0,
222+
"file_search": 2.5,
223+
}
224+
225+
var OpenAiCodeInterpreterContainerCost = map[string]float64{
226+
"1g": 0.03,
227+
"4g": 0.12,
228+
"16g": 0.48,
229+
"64g": 1.92,
222230
}
223231

224232
var AllowedTools = []string{
225233
"web_search",
226234
"web_search_preview",
235+
236+
"code_interpreter",
237+
238+
"file_search",
227239
}
228240

229241
type tokenCounter interface {
@@ -571,6 +583,9 @@ func (ce *CostEstimator) EstimateResponseApiToolCallsCost(tools []responsesOpena
571583
totalCost := 0.0
572584
for _, tool := range tools {
573585
toolType := tool.Type
586+
if toolType == "code_interpreter" {
587+
continue
588+
}
574589
cost, ok := OpenAiPerThousandCallsToolCost[extendedToolType(toolType, model)]
575590
if !ok {
576591
return 0, fmt.Errorf("tool type %s is not present in the tool cost map provided", toolType)
@@ -580,6 +595,26 @@ func (ce *CostEstimator) EstimateResponseApiToolCallsCost(tools []responsesOpena
580595
return totalCost / 1000, nil
581596
}
582597

598+
func (ce *CostEstimator) EstimateResponseApiToolCreateContainerCost(req *ResponseRequest) (float64, error) {
599+
if req == nil {
600+
return 0, nil
601+
}
602+
totalCost := 0.0
603+
for _, tool := range req.Tools {
604+
c := tool.GetContainerAsResponseRequestToolContainer()
605+
if c == nil {
606+
continue
607+
}
608+
limit := c.GetMemoryLimit()
609+
cost, ok := OpenAiCodeInterpreterContainerCost[limit]
610+
if !ok {
611+
return 0, fmt.Errorf("container with memory limit %s is not present in the code interpreter container cost map", limit)
612+
}
613+
totalCost += cost
614+
}
615+
return totalCost, nil
616+
}
617+
583618
var reasoningModelPrefix = []string{"gpt-5", "o1", "o2", "o3"}
584619

585620
func extendedToolType(toolType, model string) string {

internal/provider/openai/types.go

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,45 @@ type ResponseRequest struct {
3030
//User *string `json:"user,omitzero"` //Deprecated
3131
}
3232

33-
type ResponseRequestToolUnion struct {
33+
type ResponseRequestToolContainer struct {
3434
Type string `json:"type"`
35+
// memory_limit
36+
MemoryLimit *string `json:"memory_limit,omitzero"`
37+
}
38+
39+
func (c *ResponseRequestToolContainer) GetMemoryLimit() string {
40+
if c.MemoryLimit != nil {
41+
return *c.MemoryLimit
42+
}
43+
return "1g"
44+
}
45+
46+
type ResponseRequestToolUnion struct {
47+
Type string `json:"type"`
48+
Container any `json:"container"`
49+
}
50+
51+
func (u *ResponseRequestToolUnion) GetContainerAsResponseRequestToolContainer() *ResponseRequestToolContainer {
52+
if container, ok := u.Container.(map[string]interface{}); ok {
53+
cType := "auto"
54+
rawType, exists := container["type"]
55+
if !exists {
56+
cType = "auto"
57+
}
58+
if typeStr, ok := rawType.(string); ok {
59+
cType = typeStr
60+
}
61+
toolContainer := &ResponseRequestToolContainer{
62+
Type: cType,
63+
MemoryLimit: nil,
64+
}
65+
66+
if memoryLimit, exists := container["memory_limit"]; exists {
67+
if memoryLimitStr, ok := memoryLimit.(string); ok {
68+
toolContainer.MemoryLimit = &memoryLimitStr
69+
}
70+
}
71+
return toolContainer
72+
}
73+
return nil
3574
}

internal/server/web/proxy/middleware.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ type estimator interface {
6060
EstimateChatCompletionPromptTokenCounts(model string, r *goopenai.ChatCompletionRequest) (int, error)
6161
EstimateResponseApiTotalCost(model string, usage responsesOpenai.ResponseUsage) (float64, error)
6262
EstimateResponseApiToolCallsCost(tools []responsesOpenai.ToolUnion, model string) (float64, error)
63+
EstimateResponseApiToolCreateContainerCost(req *openai.ResponseRequest) (float64, error)
6364
}
6465

6566
type azureEstimator interface {
@@ -808,6 +809,8 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag
808809
return
809810
}
810811

812+
ginCtxSetResponsesRequest(c, responsesReq)
813+
811814
if gopointer.ToValueOrDefault(responsesReq.Background, false) {
812815
telemetry.Incr("bricksllm.proxy.get_middleware.background_not_allowed", nil, 1)
813816
JSON(c, http.StatusForbidden, "[BricksLLM] background is not allowed")
@@ -830,6 +833,25 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag
830833
return
831834
}
832835

836+
isCreateContainerTool := false
837+
var containerMemLimit string
838+
for _, tool := range responsesReq.Tools {
839+
if tool.GetContainerAsResponseRequestToolContainer() != nil {
840+
isCreateContainerTool = true
841+
containerMemLimit = tool.GetContainerAsResponseRequestToolContainer().GetMemoryLimit()
842+
break
843+
}
844+
}
845+
if isCreateContainerTool {
846+
_, ok := openai.OpenAiCodeInterpreterContainerCost[containerMemLimit]
847+
if !ok {
848+
telemetry.Incr("bricksllm.proxy.get_middleware.container_memory_limit_not_allowed", nil, 1)
849+
JSON(c, http.StatusForbidden, "[BricksLLM] container memory limit is not allowed")
850+
c.Abort()
851+
return
852+
}
853+
}
854+
833855
userId = gopointer.ToValueOrDefault(responsesReq.SafetyIdentifier, "")
834856
enrichedEvent.Request = responsesReq
835857
c.Set("model", gopointer.ToValueOrDefault(responsesReq.Model, ""))

internal/server/web/proxy/responses.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"net/http"
1212
"time"
1313

14+
"github.com/bricks-cloud/bricksllm/internal/provider/openai"
1415
"github.com/bricks-cloud/bricksllm/internal/telemetry"
1516
"github.com/bricks-cloud/bricksllm/internal/util"
1617
"github.com/gin-gonic/gin"
@@ -98,6 +99,13 @@ func getResponsesHandler(prod, private bool, client http.Client, e estimator) gi
9899
telemetry.Incr("bricksllm.proxy.get_chat_completion_handler.estimate_total_cost_error", nil, 1)
99100
logError(log, "error when estimating openai cost", prod, err)
100101
}
102+
reqResp, _ := ginCtxGetResponsesRequest(c)
103+
containerCost, err := e.EstimateResponseApiToolCreateContainerCost(reqResp)
104+
if err != nil {
105+
telemetry.Incr("bricksllm.proxy.get_chat_completion_handler.estimate_tool_container_cost_error", nil, 1)
106+
logError(log, "error when estimating openai tool container cost", prod, err)
107+
}
108+
cost += containerCost
101109
toolsCost, err := e.EstimateResponseApiToolCallsCost(resp.Tools, model)
102110
if err != nil {
103111
telemetry.Incr("bricksllm.proxy.get_chat_completion_handler.estimate_tool_calls_cost_error", nil, 1)
@@ -237,6 +245,13 @@ func getResponsesHandler(prod, private bool, client http.Client, e estimator) gi
237245
telemetry.Incr("bricksllm.proxy.get_chat_completion_handler.estimate_total_cost_error", nil, 1)
238246
logError(log, "error when estimating openai cost", prod, err)
239247
}
248+
reqResp, _ := ginCtxGetResponsesRequest(c)
249+
containerCost, err := e.EstimateResponseApiToolCreateContainerCost(reqResp)
250+
if err != nil {
251+
telemetry.Incr("bricksllm.proxy.get_chat_completion_handler.estimate_tool_container_cost_error", nil, 1)
252+
logError(log, "error when estimating openai tool container cost", prod, err)
253+
}
254+
streamCost += containerCost
240255
toolsCost, err := e.EstimateResponseApiToolCallsCost(responsesStreamResp.Response.Tools, model)
241256
if err != nil {
242257
telemetry.Incr("bricksllm.proxy.get_chat_completion_handler.estimate_tool_calls_cost_error", nil, 1)
@@ -268,3 +283,21 @@ func int64ToInt(src int64) (int, error) {
268283
}
269284
return int(src), nil
270285
}
286+
287+
func ginCtxSetResponsesRequest(c *gin.Context, req *openai.ResponseRequest) {
288+
c.Set("responses_request", req)
289+
}
290+
291+
func ginCtxGetResponsesRequest(c *gin.Context) (*openai.ResponseRequest, error) {
292+
reqAny, exists := c.Get("responses_request")
293+
if !exists {
294+
return nil, errors.New("responses request not found in gin context")
295+
}
296+
297+
req, ok := reqAny.(*openai.ResponseRequest)
298+
if !ok {
299+
return nil, errors.New("responses request in gin context has invalid type")
300+
}
301+
302+
return req, nil
303+
}

0 commit comments

Comments
 (0)