Skip to content

Commit b2cfe57

Browse files
corbtclaude
andauthored
feat: Add OpenEnv integration example (#445)
* feat: Add OpenEnv integration example with echo environment Demonstrates how to use OpenEnv environments with ART for training. The example shows a simple echo environment that rewards longer messages. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]> * docs: Add OpenEnv integration documentation - Add concise documentation page for OpenEnv integration - Link to OpenEnv GitHub repository - Emphasize automatic integration due to ART's flexible architecture - Include complete code example demonstrating usage 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]> * chore: Add package-lock.json to docs .gitignore --------- Co-authored-by: Claude <[email protected]>
1 parent 0036512 commit b2cfe57

File tree

4 files changed

+198
-2
lines changed

4 files changed

+198
-2
lines changed

docs/.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
node_modules
1+
node_modules
2+
package-lock.json

docs/docs.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@
7272
{
7373
"group": "Integrations",
7474
"pages": [
75-
"integrations/langgraph-integration"
75+
"integrations/langgraph-integration",
76+
"integrations/openenv-integration"
7677
]
7778
},
7879
{
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
---
2+
title: "🌍 OpenEnv"
3+
description: "Train AI agents in isolated execution environments using OpenEnv with ART's reinforcement learning"
4+
---
5+
6+
# OpenEnv Integration
7+
8+
[OpenEnv](https://github.com/meta-pytorch/OpenEnv) provides a standard for interacting with agentic execution environments via simple Gymnasium-style APIs, making it easy to create reproducible training scenarios for code generation, tool usage, and other complex tasks. Because ART is unopinionated about the shape of your environment and rollout function, integration with OpenEnv is automatic - you can use any OpenEnv environment with ART without any special adapters or configuration.
9+
10+
## Code Example
11+
12+
Here's a complete example showing how to train an agent using OpenEnv's echo environment with ART:
13+
14+
```python
15+
import asyncio
16+
from datetime import datetime
17+
18+
import art
19+
from art.serverless.backend import ServerlessBackend
20+
from dotenv import load_dotenv
21+
from envs.echo_env import EchoAction, EchoEnv
22+
import weave
23+
24+
PROMPT = "Use at most 100 tokens; maximize the total character length of the output."
25+
NUM_STEPS = 50
26+
ROLLOUTS_PER_GROUP = 4
27+
28+
29+
# The rollout function defines how your agent interacts with the environment
30+
async def rollout(model: art.TrainableModel, env_client: EchoEnv) -> art.Trajectory:
31+
# Reset the environment to get initial state
32+
await asyncio.to_thread(env_client.reset)
33+
34+
# Create a trajectory to store interactions and rewards
35+
traj = art.Trajectory(
36+
messages_and_choices=[{"role": "system", "content": PROMPT}],
37+
reward=0.0
38+
)
39+
40+
# Use the model to generate an action
41+
choice = (
42+
await model.openai_client().chat.completions.create(
43+
model=model.inference_model_name,
44+
messages=traj.messages(),
45+
max_completion_tokens=100,
46+
timeout=30,
47+
)
48+
).choices[0]
49+
reply = (choice.message.content or "").strip()
50+
51+
# Send the action to the environment and get observation/reward
52+
result = await asyncio.to_thread(
53+
env_client.step,
54+
EchoAction(message=reply)
55+
)
56+
57+
# Record the model's output and reward
58+
traj.messages_and_choices.append(choice)
59+
traj.reward = result.reward
60+
61+
return traj.finish()
62+
63+
64+
async def main() -> None:
65+
load_dotenv()
66+
weave.init("openenv-demo")
67+
68+
# Set up the training backend
69+
backend = ServerlessBackend()
70+
71+
# Define the model to train
72+
model = art.TrainableModel(
73+
name=f"openenv-echo-{datetime.now().strftime('%Y-%m-%d-%H%M%S')}",
74+
project="openenv-demo",
75+
base_model="OpenPipe/Qwen3-14B-Instruct",
76+
)
77+
await model.register(backend)
78+
79+
# Create a pool of environment clients for efficient training
80+
env_pool = [
81+
EchoEnv.from_docker_image("quixote13/echo-env:latest")
82+
for _ in range(ROLLOUTS_PER_GROUP)
83+
]
84+
85+
# Training loop
86+
for step in range(await model.get_step(), NUM_STEPS):
87+
print(f"Gathering groups for step {step}")
88+
89+
# Run multiple rollouts in parallel
90+
groups = await art.gather_trajectory_groups([
91+
art.TrajectoryGroup(
92+
rollout(model, env_client)
93+
for env_client in env_pool
94+
)
95+
])
96+
97+
# Train the model on collected trajectories
98+
await model.train(groups)
99+
100+
101+
if __name__ == "__main__":
102+
asyncio.run(main())
103+
```

examples/openenv_echo.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# /// script
2+
# requires-python = ">=3.10"
3+
# dependencies = [
4+
# "openenv-core==0.1.13",
5+
# "openpipe-art==0.5.1",
6+
# ]
7+
#
8+
# ///
9+
import asyncio
10+
from datetime import datetime
11+
12+
import art
13+
from art.serverless.backend import ServerlessBackend
14+
from dotenv import load_dotenv
15+
from envs.echo_env import EchoAction, EchoEnv
16+
import weave
17+
18+
PROMPT = "Use at most 100 tokens; maximize the total character length of the output."
19+
NUM_STEPS = 50
20+
ROLLOUTS_PER_GROUP = 4
21+
22+
23+
# In ART, the rollout function
24+
async def rollout(model: art.TrainableModel, env_client: EchoEnv) -> art.Trajectory:
25+
# For the simple echo environment there's no internal state to reset, but we show resetting anyway to demonstrate the pattern.
26+
await asyncio.to_thread(env_client.reset)
27+
28+
# We create an art.Trajectory object to store our messages as well as the final reward.
29+
traj = art.Trajectory(
30+
messages_and_choices=[{"role": "system", "content": PROMPT}], reward=0.0
31+
)
32+
33+
# We use the model we're training to generate the next action to send to the environment. For this simple echo environment, the action is a single message.
34+
choice = (
35+
await model.openai_client().chat.completions.create(
36+
model=model.inference_model_name,
37+
messages=traj.messages(),
38+
max_completion_tokens=100,
39+
timeout=30,
40+
)
41+
).choices[0]
42+
reply = (choice.message.content or "").strip()
43+
44+
# We send the action to the environment.
45+
result = await asyncio.to_thread(env_client.step, EchoAction(message=reply))
46+
47+
# We need to record the actual message we produced so we can use it for training later.
48+
traj.messages_and_choices.append(choice)
49+
50+
# The environment gives us back a reward (in this case it's simply the length of the message we sent divided by 10). We record it so we can use it for training later.
51+
traj.reward = result.reward
52+
53+
# We return the completed trajectory to the trainer.
54+
return traj.finish()
55+
56+
57+
async def main() -> None:
58+
load_dotenv()
59+
60+
weave.init("openenv-demo")
61+
62+
# The ServerlessBackend requires a `WANDB_API_KEY` environment variable to be set. You can also use the ART `LocalBackend` to train on a local GPU.
63+
backend = ServerlessBackend()
64+
65+
# We define a model that we'll train. The model is a LoRA adapter on top of Qwen3-14B.
66+
model = art.TrainableModel(
67+
name=f"openenv-echo-{datetime.now().strftime('%Y-%m-%d-%H%M%S')}",
68+
project="openenv-demo",
69+
base_model="OpenPipe/Qwen3-14B-Instruct",
70+
)
71+
await model.register(backend)
72+
73+
# We create a shared pool of environment clients for training, to avoid starting up and tearing down docker containers for each rollout.
74+
env_pool = [
75+
EchoEnv.from_docker_image("quixote13/echo-env:latest")
76+
for _ in range(ROLLOUTS_PER_GROUP)
77+
]
78+
79+
# We train the model for a fixed number of steps.
80+
for _step in range(await model.get_step(), NUM_STEPS):
81+
print(f"Gathering groups for step {_step}")
82+
83+
# We
84+
groups = await art.gather_trajectory_groups(
85+
[art.TrajectoryGroup(rollout(model, env_client) for env_client in env_pool)]
86+
)
87+
88+
await model.train(groups)
89+
90+
91+
asyncio.run(main())

0 commit comments

Comments
 (0)