Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/ci-pr-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install -y pkg-config python3-dev python3-pip
make install-dependencies
make download-tokenizer
make download-zmq
make install-python-deps
pip3 install transformers --break-system-packages

- name: Run lint checks
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ pkg/dataset/.llm-d
pkg/llm-d-inference-sim/tests-tmp/
pkg/llm-d-inference-sim/.llm-d/
.llm-d/
.venv
185 changes: 140 additions & 45 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ IMAGE_REGISTRY ?= ghcr.io/llm-d
IMAGE_TAG_BASE ?= $(IMAGE_REGISTRY)/$(PROJECT_NAME)
SIM_TAG ?= dev
IMG = $(IMAGE_TAG_BASE):$(SIM_TAG)
POD_IP ?= pod
export POD_IP

ifeq ($(TARGETOS),darwin)
ifeq ($(TARGETARCH),amd64)
Expand Down Expand Up @@ -60,20 +62,61 @@ export PKG_CONFIG_PATH=/usr/lib/pkgconfig
PYTHON_VERSION := 3.12

# Unified Python configuration detection. This block runs once.
PYTHON_CONFIG ?= $(shell command -v python$(PYTHON_VERSION)-config || command -v python3-config)
# It prioritizes python-config, then pkg-config, for reliability.
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
# macOS: Find Homebrew's python-config script for the most reliable flags.
BREW_PREFIX := $(shell command -v brew >/dev/null 2>&1 && brew --prefix python@$(PYTHON_VERSION) 2>/dev/null)
PYTHON_CONFIG := $(BREW_PREFIX)/bin/python$(PYTHON_VERSION)-config
ifneq ($(shell $(PYTHON_CONFIG) --cflags 2>/dev/null),)
PYTHON_CFLAGS := $(shell $(PYTHON_CONFIG) --cflags)
# Use --ldflags --embed to get all necessary flags for linking
PYTHON_LDFLAGS := $(shell $(PYTHON_CONFIG) --ldflags --embed)
PYTHON_LIBS :=
else
$(error "Could not execute 'python$(PYTHON_VERSION)-config' from Homebrew. Please ensure Python is installed correctly with: 'brew install python@$(PYTHON_VERSION)'")
endif
else ifeq ($(UNAME_S),Linux)
# Linux: Use standard system tools to find flags.
PYTHON_CONFIG := $(shell command -v python$(PYTHON_VERSION)-config || command -v python3-config)
ifneq ($(shell $(PYTHON_CONFIG) --cflags 2>/dev/null),)
# Use python-config if available and correct
PYTHON_CFLAGS := $(shell $(PYTHON_CONFIG) --cflags)
PYTHON_LDFLAGS := $(shell $(PYTHON_CONFIG) --ldflags --embed)
PYTHON_LIBS :=
else ifneq ($(shell pkg-config --cflags python-$(PYTHON_VERSION) 2>/dev/null),)
# Fallback to pkg-config
PYTHON_CFLAGS := $(shell pkg-config --cflags python-$(PYTHON_VERSION))
PYTHON_LDFLAGS := $(shell pkg-config --libs python-$(PYTHON_VERSION))
PYTHON_LIBS :=
else
$(error "Python $(PYTHON_VERSION) development headers not found. Please install with: 'sudo apt install python$(PYTHON_VERSION)-dev' or 'sudo dnf install python$(PYTHON_VERSION)-devel'")
endif
else
$(error "Unsupported OS: $(UNAME_S)")
endif

# Final CGO flags with all dependencies
CGO_CFLAGS_FINAL := $(PYTHON_CFLAGS) -Ilib
CGO_LDFLAGS_FINAL := $(PYTHON_LDFLAGS) $(PYTHON_LIBS) -Llib -ltokenizers -ldl -lm

VENV_DIR ?= $(shell pwd)/.venv
VENV_BIN := $(VENV_DIR)/bin
VENV_SRC := $(VENV_DIR)/python

CGO_CFLAGS := $(shell $(PYTHON_CONFIG) --cflags --embed)
CGO_LDFLAGS := $(shell $(PYTHON_CONFIG) --ldflags --embed)
PYTHON_EXE := $(shell command -v python$(PYTHON_VERSION) || command -v python3)

GOMODCACHE := $(shell go env GOMODCACHE)
KV_CACHE_MGR_VERSION := $(shell go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager)
KV_CACHE_MGR_PATH := $(GOMODCACHE)/github.com/llm-d/llm-d-kv-cache-manager@$(KV_CACHE_MGR_VERSION)/pkg/preprocessing/chat_completions
export PYTHONPATH := $(KV_CACHE_MGR_PATH):$(PYTHONPATH)

CPATH := $(PYTHON_INCLUDE):$(CPATH)
# Common environment variables for Go tests and builds
export CGO_ENABLED=1
export CGO_CFLAGS=$(CGO_CFLAGS_FINAL)
export CGO_LDFLAGS=$(CGO_LDFLAGS_FINAL)
export PYTHONPATH=$(VENV_SRC):$(VENV_DIR)/lib/python$(PYTHON_VERSION)/site-packages

GO_LDFLAGS := -extldflags '-L$(shell pwd)/lib $(LDFLAGS) $(CGO_LDFLAGS)'
CGO_ENABLED=1
TOKENIZER_LIB = lib/libtokenizers.a
# Extract TOKENIZER_VERSION from Dockerfile
TOKENIZER_VERSION := $(shell grep '^ARG TOKENIZER_VERSION=' Dockerfile | cut -d'=' -f2)
Expand All @@ -84,7 +127,11 @@ $(TOKENIZER_LIB):
## Download the HuggingFace tokenizer bindings.
@echo "Downloading HuggingFace tokenizer bindings for version $(TOKENIZER_VERSION)..."
mkdir -p lib
curl -L https://github.com/daulet/tokenizers/releases/download/$(TOKENIZER_VERSION)/libtokenizers.$(TARGETOS)-$(TOKENIZER_ARCH).tar.gz | tar -xz -C lib
if [ "$(TARGETOS)" = "darwin" ] && [ "$(TARGETARCH)" = "amd64" ]; then \
curl -L https://github.com/daulet/tokenizers/releases/download/$(TOKENIZER_VERSION)/libtokenizers.$(TARGETOS)-x86_64.tar.gz | tar -xz -C lib; \
else \
curl -L https://github.com/daulet/tokenizers/releases/download/$(TOKENIZER_VERSION)/libtokenizers.$(TARGETOS)-$(TARGETARCH).tar.gz | tar -xz -C lib; \
fi
ranlib lib/*.a

##@ Development
Expand All @@ -101,13 +148,18 @@ format: ## Format Go source files
@gofmt -l -w $(SRC)

.PHONY: test
test: $(GINKGO) install-dependencies ## Run tests
@printf "\033[33;1m==== Running tests ====\033[0m\n"
ifdef GINKGO_FOCUS
CGO_ENABLED=1 CGO_CFLAGS="$(CGO_CFLAGS)" $(GINKGO) -ldflags="$(GO_LDFLAGS)" -v -r -- -ginkgo.v -ginkgo.focus="$(GINKGO_FOCUS)"
else
CGO_ENABLED=1 CGO_CFLAGS="$(CGO_CFLAGS)" $(GINKGO) -ldflags="$(GO_LDFLAGS)" -v -r $(TEST_PKG)
endif
test: download-tokenizer install-python-deps ## Run unit tests
@printf "\033[33;1m==== Running unit tests ====\033[0m\n"
if [ -n "$(GINKGO_FOCUS)" ] && [ -z "$(GINKGO_FOCUS_PKG)" ]; then \
echo "Error: GINKGO_FOCUS is defined without GINKGO_FOCUS_PKG. Both required or neither."; \
exit 1; \
elif [ -n "$(GINKGO_FOCUS)$(GINKGO_FOCUS_PKG)" ]; then \
echo "Running specific tests"; \
go test -v $(GINKGO_FOCUS_PKG) $(if $(GINKGO_FOCUS),-ginkgo.focus="$(GINKGO_FOCUS)",); \
else \
echo "Running all tests"; \
go test -v ./pkg/...; \
fi

.PHONY: post-deploy-test
post-deploy-test: ## Run post deployment tests
Expand All @@ -122,10 +174,15 @@ lint: $(GOLANGCI_LINT) ## Run lint
##@ Build

.PHONY: build
build: check-go install-dependencies
build: check-go download-tokenizer install-python-deps download-zmq
@printf "\033[33;1m==== Building ====\033[0m\n"
CGO_CFLAGS="$(CGO_CFLAGS)" go build -ldflags="$(GO_LDFLAGS)" -o $(LOCALBIN)/$(PROJECT_NAME) cmd/$(PROJECT_NAME)/main.go

.PHONY: run
run: install-python-deps # build ## Run the application locally
@printf "\033[33;1m==== Running application ====\033[0m\n"
. $(VENV_DIR)/bin/activate && ./bin/$(PROJECT_NAME) $(ARGS)

##@ Container Build/Push

.PHONY: image-build
Expand Down Expand Up @@ -234,43 +291,81 @@ print-project-name: ## Print the current project name
install-hooks: ## Install git hooks
git config core.hooksPath hooks

.PHONY: detect-python
detect-python: ## Detects Python and prints the configuration.
@printf "\033[33;1m==== Python Configuration ====\033[0m\n"
@if [ -z "$(PYTHON_EXE)" ]; then \
echo "ERROR: Python 3 not found in PATH."; \
exit 1; \
fi
@# Verify the version of the found python executable using its exit code
@if ! $(PYTHON_EXE) -c "import sys; sys.exit(0 if sys.version_info[:2] == ($(shell echo $(PYTHON_VERSION) | cut -d. -f1), $(shell echo $(PYTHON_VERSION) | cut -d. -f2)) else 1)"; then \
echo "ERROR: Found Python at '$(PYTHON_EXE)' but it is not version $(PYTHON_VERSION)."; \
echo "Please ensure 'python$(PYTHON_VERSION)' or a compatible 'python3' is in your PATH."; \
exit 1; \
fi
@echo "Python executable: $(PYTHON_EXE) ($$($(PYTHON_EXE) --version))"
@echo "Python CFLAGS: $(PYTHON_CFLAGS)"
@echo "Python LDFLAGS: $(PYTHON_LDFLAGS)"
@if [ -z "$(PYTHON_CFLAGS)" ]; then \
echo "ERROR: Python development headers not found. See installation instructions above."; \
exit 1; \
fi
@printf "\033[33;1m==============================\033[0m\n"

.PHONY: install-python-deps
install-python-deps: detect-python ## Sets up the Python virtual environment and installs dependencies.
@printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n"
@if [ ! -f "$(VENV_BIN)/pip" ]; then \
echo "Creating virtual environment..."; \
$(PYTHON_EXE) -m venv $(VENV_DIR) || { \
echo "ERROR: Failed to create virtual environment."; \
echo "Your Python installation may be missing the 'venv' module."; \
echo "Try: 'sudo apt install python$(PYTHON_VERSION)-venv' or 'sudo dnf install python$(PYTHON_VERSION)-devel'"; \
exit 1; \
}; \
mkdir -p $(VENV_SRC); \
fi
@echo "Upgrading pip and installing dependencies..."
@$(VENV_BIN)/pip install --upgrade pip
cp $(KV_CACHE_MGR_PATH)/requirements.txt $(VENV_SRC)/
cp $(KV_CACHE_MGR_PATH)/render_jinja_template_wrapper.py $(VENV_SRC)/
chmod u+w $(VENV_SRC)/*
@$(VENV_BIN)/pip install -r $(VENV_SRC)/requirements.txt
@echo "Verifying transformers installation..."
@$(VENV_BIN)/python -c "import transformers; print('✅ Transformers version ' + transformers.__version__ + ' installed.')" || { \
echo "ERROR: transformers library not properly installed in venv."; \
exit 1; \
}

##@ ZMQ Setup

.PHONY: install-dependencies
install-dependencies: download-tokenizer ## Install development dependencies based on OS/ARCH
@echo "Checking and installing development dependencies..."
@if [ "$(TARGETOS)" = "linux" ]; then \
if [ -x "$$(command -v apt)" ]; then \
if ! dpkg -s libzmq3-dev >/dev/null 2>&1 || ! dpkg -s g++ >/dev/null 2>&1; then \
echo "Installing dependencies with apt..."; \
sudo apt-get update && sudo apt-get install -y libzmq3-dev g++; \
else \
echo "✅ ZMQ and g++ are already installed."; \
fi; \
elif [ -x "$$(command -v dnf)" ]; then \
if ! dnf -q list installed zeromq-devel >/dev/null 2>&1 || ! dnf -q list installed gcc-c++ >/dev/null 2>&1; then \
echo "Installing dependencies with dnf..."; \
sudo dnf install -y zeromq-devel gcc-c++; \
.PHONY: download-zmq
download-zmq: ## Install ZMQ dependencies based on OS/ARCH
@echo "Checking if ZMQ is already installed..."
@if pkg-config --exists libzmq; then \
echo "✅ ZMQ is already installed."; \
else \
echo "Installing ZMQ dependencies..."; \
if [ "$(TARGETOS)" = "linux" ]; then \
if [ -x "$$(command -v apt)" ]; then \
apt update && apt install -y libzmq3-dev; \
elif [ -x "$$(command -v dnf)" ]; then \
dnf install -y zeromq-devel; \
else \
echo "✅ ZMQ and gcc-c++ are already installed."; \
echo "Unsupported Linux package manager. Install libzmq manually."; \
exit 1; \
fi; \
else \
echo "Unsupported Linux package manager. Install libzmq and g++/gcc-c++ manually."; \
exit 1; \
fi; \
elif [ "$(TARGETOS)" = "darwin" ]; then \
if [ -x "$$(command -v brew)" ]; then \
if ! brew list zeromq pkg-config >/dev/null 2>&1; then \
echo "Installing dependencies with brew..."; \
brew install zeromq pkg-config; \
elif [ "$(TARGETOS)" = "darwin" ]; then \
if [ -x "$$(command -v brew)" ]; then \
brew install zeromq; \
else \
echo "✅ ZeroMQ and pkgconf are already installed."; \
echo "Homebrew is not installed and is required to install zeromq. Install it from https://brew.sh/"; \
exit 1; \
fi; \
else \
echo "Homebrew is not installed and is required to install zeromq. Install it from https://brew.sh/"; \
echo "Unsupported OS: $(TARGETOS). Install libzmq manually - check https://zeromq.org/download/ for guidance."; \
exit 1; \
fi; \
else \
echo "Unsupported OS: $(TARGETOS). Install development dependencies manually."; \
exit 1; \
echo "✅ ZMQ dependencies installed."; \
fi
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ make build
### Running
To run the vLLM simulator in a standalone test environment, run:
```bash
./bin/llm-d-inference-sim --model my_model --port 8000
make run ARGS='--model="Qwen/Qwen2.5-1.5B-Instruct" --port 8000 -v=4'
```

## Kubernetes testing
Expand Down
15 changes: 6 additions & 9 deletions pkg/kv-cache/kv_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
"github.com/go-logr/logr"
"github.com/llm-d/llm-d-inference-sim/pkg/common"
"github.com/llm-d/llm-d-inference-sim/pkg/common/logging"
openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api"
"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvblock"
"github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"
)
Expand All @@ -49,6 +48,7 @@ func NewKVCacheHelper(config *common.Configuration, logger logr.Logger, usageCha
if err != nil {
return nil, fmt.Errorf("failed to create block cache: %w", err)
}

return &KVCacheHelper{
tokenizer: tokenizer,
tokensProcessor: tokensProcessor,
Expand All @@ -71,18 +71,16 @@ func (h *KVCacheHelper) Activate() {
h.blockCache.activate()
}

func (h *KVCacheHelper) OnRequestStart(vllmReq openaiserverapi.CompletionRequest) error {
// OnRequestStart called when request received, simulates KV-cache block management
// Returns number of tokens found in the cache.
func (h *KVCacheHelper) OnRequestStart(prompt, modelName, requestID string) (int, error) {
h.logger.V(logging.TRACE).Info("KV cache - process request")

prompt := vllmReq.GetPrompt()
modelName := vllmReq.GetModel()
requestID := vllmReq.GetRequestID()

// tokenize the input
tokens, _, err := h.tokenizer.Encode(prompt, modelName)
if err != nil {
h.logger.Error(err, "prompt tokenization failed")
return err
return 0, err
}

// get block keys
Expand All @@ -95,8 +93,7 @@ func (h *KVCacheHelper) OnRequestStart(vllmReq openaiserverapi.CompletionRequest
}

nBlocksAlreadyInCache, err := h.blockCache.startRequest(requestID, blockHashes)
vllmReq.SetNumberOfCachedPromptTokens(nBlocksAlreadyInCache * h.blockSize)
return err
return nBlocksAlreadyInCache * h.blockSize, err
}

func (h *KVCacheHelper) OnRequestEnd(requestID string) error {
Expand Down
Loading
Loading