feat: update examples to public lora

biswapanda · biswapanda · commit aac34415d5ec · 2025-12-08T14:38:14.000-08:00
diff --git a/examples/backends/vllm/launch/lora/agg_lora_s3.sh b/examples/backends/vllm/launch/lora/agg_lora_s3.sh
@@ -35,34 +35,29 @@ DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 \
     python -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager  \
     --connector none  \
     --enable-lora  \
-    --max-lora-rank 32
+    --max-lora-rank 64
 
 ################################## Example Usage ##################################
 
 # Check available models
 curl http://localhost:8000/v1/models | jq .
 
 # Load LoRA using s3 uri
-curl -X POST http://localhost:8081/v1/loras \
-  -H "Content-Type: application/json" \
-  -d '{
-    "lora_name": "Neural-Hacker/Qwen3-Math-Reasoning-LoRA",
-    "source": {
-      "uri": "s3://my-loras/Neural-Hacker/Qwen3-Math-Reasoning-LoRA"
-    }
-  }'
+curl -s  -X POST http://localhost:8081/v1/loras \
+       -H "Content-Type: application/json" \
+       -d '{"lora_name": "codelion/Qwen3-0.6B-accuracy-recovery-lora",
+     "source": {"uri": "s3://my-loras/codelion/Qwen3-0.6B-accuracy-recovery-lora"}}' | jq .
 
 # Test LoRA inference
 curl -X POST http://localhost:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "Neural-Hacker/Qwen3-Math-Reasoning-LoRA",
-    "messages": [{"role": "user", "content": "Solve (x*x - x + 1 = 0) for x"}],
+    "model": "codelion/Qwen3-0.6B-accuracy-recovery-lora",
+    "messages": [{"role": "user", "content": "What is deep learning?"}],
     "max_tokens": 300,
     "temperature": 0.0
   }'
 
-# Find the minimum possible value of \( x^2 + y^2 \) given that \( x \) and \( y \) are real numbers satisfying \( xy(x^2 - y^2) = x^2 + y^2 \) and \( x \neq 0 \)
 # Test base model inference (for comparison)
 curl -X POST http://localhost:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
@@ -74,4 +69,4 @@ curl -X POST http://localhost:8000/v1/chat/completions \
   }'
 
 # Unload LoRA
-curl -X DELETE http://localhost:8081/v1/loras/Neural-Hacker/Qwen3-Math-Reasoning-LoRA
+curl -X DELETE http://localhost:8081/v1/loras/codelion/Qwen3-0.6B-accuracy-recovery-lora
diff --git a/examples/backends/vllm/launch/lora/setup_minio.sh b/examples/backends/vllm/launch/lora/setup_minio.sh
@@ -20,8 +20,8 @@ MINIO_SECRET_KEY="minioadmin"
 BUCKET_NAME="my-loras"
 
 # Default LoRA to download (can be overridden)
-HF_LORA_REPO="${HF_LORA_REPO:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA}"
-LORA_NAME="${LORA_NAME:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA}"
+HF_LORA_REPO="${HF_LORA_REPO:-codelion/Qwen3-0.6B-accuracy-recovery-lora}"
+LORA_NAME="${LORA_NAME:-codelion/Qwen3-0.6B-accuracy-recovery-lora}"
 # TEMP_DIR will be created using mktemp when needed
 TEMP_DIR=""
 
@@ -63,8 +63,8 @@ show_help() {
     echo "  --help, -h    Show this help message"
     echo ""
     echo "Environment Variables:"
-    echo "  HF_LORA_REPO  Hugging Face repository (default: ${HF_LORA_REPO:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA})"
-    echo "  LORA_NAME     Local name for the LoRA (default: ${LORA_NAME:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA})"
+    echo "  HF_LORA_REPO  Hugging Face repository (default: ${HF_LORA_REPO:-codelion/Qwen3-0.6B-accuracy-recovery-lora})"
+    echo "  LORA_NAME     Local name for the LoRA (default: ${LORA_NAME:-codelion/Qwen3-0.6B-accuracy-recovery-lora})"
     echo ""
     echo "Examples:"
     echo "  $0                                    # Full setup"
@@ -173,6 +173,7 @@ download_lora_from_hf() {
 
     print_success "LoRA downloaded to ${TEMP_DIR}"
 
+    rm -rf "${TEMP_DIR}/.cache"
     # List downloaded files
     echo "Downloaded files:"
     ls -lh "${TEMP_DIR}"