Document Qwen3 14B instead of Qwen2.5 14B in non-tutorial examples (#437)

arcticfly · web-flow · commit 303cb64073cb · 2025-10-14T18:30:12.000-07:00
* Update docs to reference Qwen3 14B Instruct for most examples

* Update notebooks
diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ from art.serverless.backend import ServerlessBackend
 model = art.TrainableModel(
   project="voice-agent",
   name="agent-001",
-  base_model="Qwen/Qwen2.5-14B-Instruct"
+  base_model="OpenPipe/Qwen3-14B-Instruct"
 )
 
 backend = ServerlessBackend(
@@ -62,8 +62,8 @@ ART is an open-source RL framework that improves agent reliability by allowing L
 
 | Agent Task          | Example Notebook                                                                                                                       | Description                                         | Comparative Performance                                                                                                                                                                                                     |
 | ------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **ART•E [Serverless]**   | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/art-e.ipynb)                       | Qwen 2.5 14B learns to search emails using RULER     | <img src="https://github.com/openpipe/art/raw/main/assets/benchmarks/email_agent/accuracy-training-progress.svg" height="72"> [benchmarks](/dev/art-e/art_e/evaluate/display_benchmarks.ipynb)                              |
-| **2048 [Serverless]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/2048/2048.ipynb)                   | Qwen 2.5 14B learns to play 2048                     | <img src="https://github.com/openpipe/art/raw/main/assets/benchmarks/2048/accuracy-training-progress.svg" height="72"> [benchmarks](/examples/2048/display_benchmarks.ipynb)                                                |
+| **ART•E [Serverless]**   | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/art-e.ipynb)                       | Qwen3 14B learns to search emails using RULER     | <img src="https://github.com/openpipe/art/raw/main/assets/benchmarks/email_agent/accuracy-training-progress.svg" height="72"> [benchmarks](/dev/art-e/art_e/evaluate/display_benchmarks.ipynb)                              |
+| **2048 [Serverless]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/2048/2048.ipynb)                   | Qwen3 14B learns to play 2048                     | <img src="https://github.com/openpipe/art/raw/main/assets/benchmarks/2048/accuracy-training-progress.svg" height="72"> [benchmarks](/examples/2048/display_benchmarks.ipynb)                                                |
 | **ART•E LangGraph** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/langgraph/art-e-langgraph.ipynb)   | Qwen 2.5 7B learns to search emails using LangGraph | [Link coming soon]                                                                                                                                                                                                          |
 | **MCP•RL**          | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/mcp-rl/mcp-rl.ipynb)               | Qwen 2.5 3B masters the NWS MCP server              | [Link coming soon]                                                                                                                                                                                                          |
 | **Temporal Clue**   | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/temporal_clue/temporal-clue.ipynb) | Qwen 2.5 7B learns to solve Temporal Clue           | [Link coming soon]                                                                                                                                                                                                          |
diff --git a/docs/features/checkpoint-deletion.mdx b/docs/features/checkpoint-deletion.mdx
@@ -17,7 +17,7 @@ from art.serverless.backend import ServerlessBackend
 model = art.TrainableModel(
     name="agent-001",
     project="checkpoint-deletion-demo",
-    base_model="Qwen/Qwen2.5-14B-Instruct",
+    base_model="OpenPipe/Qwen3-14B-Instruct",
 )
 backend = ServerlessBackend()
 # in order for the model to know where to look for its existing checkpoints,
@@ -55,7 +55,7 @@ TRAINING_STEPS = 50
 model = art.TrainableModel(
     name="agent-001",
     project="checkpoint-deletion-demo",
-    base_model="Qwen/Qwen2.5-14B-Instruct",
+    base_model="OpenPipe/Qwen3-14B-Instruct",
 )
 backend = ServerlessBackend()
 await model.register(backend)
diff --git a/docs/features/checkpoint-forking.mdx b/docs/features/checkpoint-forking.mdx
@@ -36,7 +36,7 @@ async def train():
         model = art.TrainableModel(
             name="my-model-v2",
             project="my-project",
-            base_model="Qwen/Qwen2.5-14B-Instruct",
+            base_model="OpenPipe/Qwen3-14B-Instruct",
         )
 
         # Copy the checkpoint from another model
@@ -104,14 +104,14 @@ Here's a practical example of using checkpoint forking to test a lower learning
 base_model = art.TrainableModel(
     name="summarizer-base",
     project="experiments",
-    base_model="Qwen/Qwen2.5-14B-Instruct",
+    base_model="OpenPipe/Qwen3-14B-Instruct",
 )
 
 # Fork at step 1000 to try lower learning rate
 low_lr_model = art.TrainableModel(
     name="summarizer-low-lr",
     project="experiments",
-    base_model="Qwen/Qwen2.5-14B-Instruct",
+    base_model="OpenPipe/Qwen3-14B-Instruct",
 )
 
 async def experiment():
diff --git a/docs/features/mcp-rl.mdx b/docs/features/mcp-rl.mdx
@@ -104,8 +104,8 @@ from art.rewards import ruler_score_group
 from art import gather_trajectory_groups
 
 # Initialize the model
-model = art.RemoteModel(
-    model="Qwen/Qwen2.5-3B-Instruct",
+model = art.TrainableModel(
+    model="OpenPipe/Qwen3-14B-Instruct",
     openrouter_api_key="your_openrouter_key"
 )
 
diff --git a/docs/fundamentals/art-client.mdx b/docs/fundamentals/art-client.mdx
@@ -53,7 +53,7 @@ model = art.TrainableModel(
     # for a given task to consistently group metrics
     project="my-agentic-task",
     # the model that you want to train from
-    base_model="Qwen/Qwen2.5-14B-Instruct",
+    base_model="OpenPipe/Qwen3-14B-Instruct",
 )
 ```
 
diff --git a/docs/getting-started/installation-setup.mdx b/docs/getting-started/installation-setup.mdx
@@ -31,7 +31,7 @@ backend = LocalBackend()
 model = TrainableModel(
     name="agent-001",
     project="my-agentic-task",
-    base_model="Qwen/Qwen2.5-14B-Instruct",
+    base_model="OpenPipe/Qwen3-14B-Instruct",
 )
 
 await model.register(backend)
@@ -57,7 +57,7 @@ backend = ServerlessBackend()
 model = TrainableModel(
     name="agent-001",
     project="my-agentic-task",
-    base_model="Qwen/Qwen2.5-14B-Instruct",
+    base_model="OpenPipe/Qwen3-14B-Instruct",
 )
 
 await model.register(backend)
@@ -87,7 +87,7 @@ backend = await SkyPilotBackend.initialize_cluster(
 model = TrainableModel(
     name="agent-001",
     project="my-agentic-task",
-    base_model="Qwen/Qwen2.5-14B-Instruct",
+    base_model="OpenPipe/Qwen3-14B-Instruct",
 )
 
 await model.register(backend)
diff --git a/docs/getting-started/notebooks.mdx b/docs/getting-started/notebooks.mdx
@@ -9,13 +9,13 @@ icon: "book"
 
 | Agent Task          | Notebook                                                                                                                                         | Description                                         | Performance                                                                                                                                                                                                                                                      |
 | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **ART•E [Serverless]**   | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/art-e.ipynb)                       | Qwen 2.5 14B learns to search emails using RULER     | <a href="https://github.com/OpenPipe/ART/blob/main/dev/art-e/art_e/evaluate/display_benchmarks.ipynb"><img src="https://github.com/OpenPipe/ART/raw/main/assets/benchmarks/email_agent/accuracy-training-progress.svg" width="72" style={{margin: "0"}} /></a>   |
-| **2048 [Serverless]** | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/2048/2048.ipynb)                   | Qwen 2.5 14B learns to play 2048                     | <a href="https://github.com/OpenPipe/ART/blob/main/examples/2048/display_benchmarks.ipynb"><img src="https://github.com/OpenPipe/ART/raw/main/assets/benchmarks/2048/accuracy-training-progress.svg" width="72" style={{margin: "0"}} /></a>                     |
-| **ART•E LangGraph** | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/langgraph/art-e-langgraph.ipynb)   | Qwen 2.5 7B learns to search emails using LangGraph | [Link coming soon]                                                                                                                                                                                                                                               |
-| **MCP•RL**          | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/mcp-rl/mcp-rl.ipynb)               | Qwen 2.5 3B masters the NWS MCP server              | [Link coming soon]                                                                                                                                                                                                                                               |
-| **Temporal Clue**   | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/temporal_clue/temporal-clue.ipynb) | Qwen 2.5 7B learns to solve Temporal Clue           | [Link coming soon]                                                                                                                                                                                                                                               |
-| **Tic Tac Toe**     | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/tic_tac_toe/tic-tac-toe.ipynb)     | Qwen 2.5 3B learns to play Tic Tac Toe              | <a href="https://github.com/OpenPipe/ART/blob/main/examples/tic_tac_toe/display-benchmarks.ipynb"><img src="https://github.com/OpenPipe/ART/raw/main/assets/benchmarks/tic-tac-toe-local/accuracy-training-progress.svg" width="72" style={{margin: "0"}} /></a> |
-| **Codenames**       | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/codenames/Codenames_RL.ipynb)      | Qwen 2.5 3B learns to play Codenames                | <a href="https://github.com/OpenPipe/art-notebooks/blob/main/examples/codenames/Codenames_RL.ipynb"><img src="https://github.com/OpenPipe/ART/raw/main/assets/benchmarks/codenames/win_rate_over_time.png" width="72" style={{margin: "0"}} /></a>               |
-| **AutoRL [RULER]**  | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/auto_rl.ipynb)                     | Train Qwen 2.5 7B to master any task                | [Link coming soon]                                                                                                                                                                                                                                               |
+| **ART•E [Serverless]**   | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/art-e.ipynb)                       | Qwen3 14B learns to search emails using RULER     | <a href="https://github.com/OpenPipe/ART/blob/main/dev/art-e/art_e/evaluate/display_benchmarks.ipynb"><img src="https://github.com/OpenPipe/ART/raw/main/assets/benchmarks/email_agent/accuracy-training-progress.svg" width="72" style={{margin: "0"}} /></a>   |
+| **2048 [Serverless]** | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/2048/2048.ipynb)                   | Qwen3 14B learns to play 2048                     | <a href="https://github.com/OpenPipe/ART/blob/main/examples/2048/display_benchmarks.ipynb"><img src="https://github.com/OpenPipe/ART/raw/main/assets/benchmarks/2048/accuracy-training-progress.svg" width="72" style={{margin: "0"}} /></a>                     |
+| **ART•E LangGraph** | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/langgraph/art-e-langgraph.ipynb)   | Qwen2.5 7B learns to search emails using LangGraph | [Link coming soon]                                                                                                                                                                                                                                               |
+| **MCP•RL**          | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/mcp-rl/mcp-rl.ipynb)               | Qwen2.5 3B masters the NWS MCP server              | [Link coming soon]                                                                                                                                                                                                                                               |
+| **Temporal Clue**   | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/temporal_clue/temporal-clue.ipynb) | Qwen2.5 7B learns to solve Temporal Clue           | [Link coming soon]                                                                                                                                                                                                                                               |
+| **Tic Tac Toe**     | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/tic_tac_toe/tic-tac-toe.ipynb)     | Qwen2.5 3B learns to play Tic Tac Toe              | <a href="https://github.com/OpenPipe/ART/blob/main/examples/tic_tac_toe/display-benchmarks.ipynb"><img src="https://github.com/OpenPipe/ART/raw/main/assets/benchmarks/tic-tac-toe-local/accuracy-training-progress.svg" width="72" style={{margin: "0"}} /></a> |
+| **Codenames**       | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/codenames/Codenames_RL.ipynb)      | Qwen2.5 3B learns to play Codenames                | <a href="https://github.com/OpenPipe/art-notebooks/blob/main/examples/codenames/Codenames_RL.ipynb"><img src="https://github.com/OpenPipe/ART/raw/main/assets/benchmarks/codenames/win_rate_over_time.png" width="72" style={{margin: "0"}} /></a>               |
+| **AutoRL [RULER]**  | [🏋️&nbsp;Train&nbsp;agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/auto_rl.ipynb)                     | Train Qwen2.5 7B to master any task                | [Link coming soon]                                                                                                                                                                                                                                               |
 
 </div>
diff --git a/docs/getting-started/quick-start.mdx b/docs/getting-started/quick-start.mdx
@@ -4,7 +4,7 @@ description: "Get started with ART in a few quick steps."
 icon: "forward"
 ---
 
-In this Quick Start tutorial, we'll be training Qwen 2.5 14B to play [2048](https://play2048.co/), a simple game that requires forward planning and basic math skills.
+In this Quick Start tutorial, we'll be training Qwen3 14B Instruct to play [2048](https://play2048.co/), a simple game that requires forward planning and basic math skills.
 
 <Info>
 
diff --git a/docs/resources/models.mdx b/docs/resources/models.mdx
@@ -5,13 +5,21 @@ description: "Train open source models on ART."
 icon: "robot"
 ---
 
-## Recommended Models
+## Serverless Models
 
-- [Qwen 2.5 14B Instruct](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct)
+We currently only support the following model for serverless training. We are actively adding support for both larger and smaller models. If there's a particular model you'd like to see serverless support for, please send a request to support@wandb.com.
+
+- [OpenPipe Qwen 3 14B Instruct](https://huggingface.co/OpenPipe/Qwen3-14B-Instruct)
   - Good balance of performance and size. Has support for tool calling and generally trains well. This is our recommended model for users new to RL.
-- [Qwen 2.5 7B Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct)
+
+
+## Recommended Local Models
+
+If you're developing locally or in your own hardware, here are a couple other models you could try in addition to the recommended serverless list.
+
+- [Qwen2.5 7B Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct)
   - Less capable than 14B, but smaller and faster
-- [Qwen 2.5 32B Instruct](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct)
+- [Qwen2.5 32B Instruct](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct)
   - More capable than 14B, but larger and slower
 
 ## More Models
@@ -24,7 +32,7 @@ Here are additional models that we've tested and found to work well with ART:
 - [Llama 3.2 1B Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct)
 - [Llama 3.2 3B Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct)
 - [Llama 3.3 70B Instruct](https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct)
-- [Qwen 2.5 72B Instruct](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct)
+- [Qwen2.5 72B Instruct](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct)
 - Additionally, the [Qwen 3](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) family of models is well supported for single-turn workflows. For multi-turn workflows the Qwen 3 chat template removes the `<think>` tokens from previous turns, which makes training more complicated. It is still possible to use for multi-turn workflows by splitting each turn into a separate message history with our `additional_histories` trajectory parameter (see [Additional Histories](/features/additional-histories)).
 
 If you're curious about a model that is not listed above, ask in the Discord [#support](https://discord.com/channels/1359674493949448375/1359674622965973185) channel.
diff --git a/docs/tutorials/open-deep-research.mdx b/docs/tutorials/open-deep-research.mdx

Original file line number	Diff line number	Diff line change
`@@ -104,8 +104,8 @@ from art.rewards import ruler_score_group`
`104`	`104`	`from art import gather_trajectory_groups`
`105`	`105`
`106`	`106`	`# Initialize the model`
`107`		`-model = art.RemoteModel(`
`108`		`- model="Qwen/Qwen2.5-3B-Instruct",`
	`107`	`+model = art.TrainableModel(`
	`108`	`+ model="OpenPipe/Qwen3-14B-Instruct",`
`109`	`109`	`openrouter_api_key="your_openrouter_key"`
`110`	`110`	`)`
`111`	`111`
Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,7 @@ model = art.TrainableModel(`
`53`	`53`	`# for a given task to consistently group metrics`
`54`	`54`	`project="my-agentic-task",`
`55`	`55`	`# the model that you want to train from`
`56`		`- base_model="Qwen/Qwen2.5-14B-Instruct",`
	`56`	`+ base_model="OpenPipe/Qwen3-14B-Instruct",`
`57`	`57`	`)`
`58`	`58`	```
`59`	`59`