Skip to content

Commit 953a6b1

Browse files
authored
Revisions (#25)
1 parent 4ebcbf3 commit 953a6b1

File tree

3 files changed

+25
-102
lines changed

3 files changed

+25
-102
lines changed

src/fhda/data_analysis_env.py

Lines changed: 2 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import hashlib
2-
import json
32
import logging
43
import shutil
54
from typing import Any, cast
@@ -10,11 +9,10 @@
109
Message,
1110
Messages,
1211
Tool,
13-
eval_answer,
1412
)
1513

1614
from .notebook_env import NBEnvironment
17-
from .utils import NBLanguage, MultipleChoiceQuestion, nb_to_html
15+
from .utils import NBLanguage, nb_to_html
1816
from . import prompts
1917
from . import config as cfg
2018

@@ -35,14 +33,12 @@ def __init__(
3533
correct_reward: float = 1.0,
3634
eval_mode: EvalAnswerMode,
3735
metadata: dict[str, Any] | None = None, # used for NBEvalExpt
38-
mcqs: list[MultipleChoiceQuestion] | None = None,
3936
**kwargs,
4037
):
4138
super().__init__(**kwargs)
4239

4340
self.problem_id = problem_id
4441
self.problem = problem
45-
self.mcqs = mcqs
4642
self.answer = answer
4743
self.eval_mode = eval_mode
4844
self.correct_reward = correct_reward
@@ -74,80 +70,13 @@ async def submit_answer(self, answer: str | float | dict[str, Any] | None) -> st
7470
Args:
7571
answer: The answer to the problem
7672
"""
77-
# TODO: support various eval modes
7873
self.state.answer = answer
7974
self.state.done = True
8075
logger.info("Submitting answer and closing environment")
8176
await self.close()
82-
correct = False
8377
logger.info("Answer: %s", answer)
8478

85-
if self.eval_mode is None:
86-
return CORRECT_MSG
87-
88-
if isinstance(self.answer, int):
89-
try:
90-
answer = int(answer) # type: ignore[arg-type]
91-
except ValueError:
92-
pass
93-
else:
94-
correct = answer == self.answer
95-
96-
elif isinstance(self.answer, float):
97-
try:
98-
answer = float(answer) # type: ignore[arg-type]
99-
except ValueError:
100-
pass
101-
else:
102-
correct = abs(answer - self.answer) < 1e-4 * self.answer
103-
104-
elif isinstance(self.answer, str):
105-
correct = bool(
106-
await eval_answer(
107-
proposed=str(answer),
108-
correct=str(self.answer),
109-
question=self.problem,
110-
eval_mode=self.eval_mode,
111-
)
112-
)
113-
elif isinstance(self.answer, dict): # This is for mcqs and open questions
114-
# Check if answer is a json string
115-
if isinstance(answer, str): # type: ignore[unreachable]
116-
# Process json into dictionary
117-
try:
118-
processed_answer = json.loads(answer)
119-
except json.JSONDecodeError:
120-
return INCORRECT_MSG
121-
else:
122-
processed_answer = answer if isinstance(answer, dict) else {}
123-
124-
# Loop through each question and answer
125-
for question_id, agent_answer in processed_answer.items():
126-
try:
127-
ideal_answer = self.answer[question_id]
128-
question = next(
129-
q
130-
for q in self.mcqs
131-
if q.question_id.lower() == question_id.lower()
132-
)
133-
correct = bool(
134-
await eval_answer(
135-
proposed=str(agent_answer),
136-
correct=str(ideal_answer),
137-
question=question,
138-
eval_mode=self.eval_mode,
139-
)
140-
)
141-
self.question_rewards[question_id] = correct
142-
except KeyError:
143-
self.question_rewards[question_id] = 0
144-
average_reward = sum(self.question_rewards.values()) / len(self.mcqs)
145-
correct = round(average_reward) == 1.0
146-
147-
if correct:
148-
self.state.total_reward += self.correct_reward
149-
return CORRECT_MSG
150-
return INCORRECT_MSG
79+
return f"Submitted answer: {answer}"
15180

15281
@classmethod
15382
def from_task(

src/fhda/notebook_env.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ async def close(self):
113113

114114
class NBEnvironment(Environment[NBEnvironmentState]):
115115
NOTEBOOK_NAME: ClassVar[str] = "notebook.ipynb"
116-
EXEC_TIMEOUT: ClassVar[float | None] = 300.0
116+
EXEC_TIMEOUT: ClassVar[float | None] = 1200.0
117117

118118
state: NBEnvironmentState
119119

src/fhda/prompts.py

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,20 @@
77

88
CAPSULE_SYSTEM_PROMPT_MCQ = """
99
You are an expert bioinformatician and seasoned biological data scientist.
10-
Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that analyzes data to answer a series of Multiple Choice Questions (MCQs).
11-
The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer these questions, structured in a way that another model could use to derive the answers.
10+
Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that analyzes data to answer a Multiple Choice Question (MCQ).
11+
The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer this question, structured in a way that another model could use to derive the answer.
1212
"""
1313

1414
CAPSULE_SYSTEM_PROMPT_OPEN = """
1515
You are an expert bioinformatician and seasoned biological data scientist.
16-
Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that analyzes data to answer a series of open-ended questions.
17-
The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer these questions, structured in a way that another model could use to derive the answers.
16+
Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that analyzes data to answer an open-ended question.
17+
The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer this question, structured in a way that another model could use to derive the answer.
1818
"""
1919

2020
CAPSULE_SYSTEM_PROMPT_QUERY = """
2121
You are an expert bioinformatician and seasoned biological data scientist.
2222
Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that analyzes data to answer a user query.
23-
The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer these questions.
23+
The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer this question.
2424
Take your time to think through the question and the data before writing any code, explore the data rigorously and defend your conclusions rigorously.
2525
"""
2626

@@ -168,35 +168,29 @@
168168
"""
169169
SUBMIT_ANSWER_SINGLE = """
170170
[Use the submit_answer tool to submit your final answer as a single string]
171+
IMPORTANT: Wrap your answer in XML tags <answer> </answer>
171172
Example output:
172173
```
173-
submit_answer("CD94") or submit_answer("-1.23")
174+
submit_answer("<answer>CD94</answer>") or submit_answer("<answer>-1.23</answer>")
174175
```
175176
Remember, the final notebook should contain all necessary artifacts (plots, tables, print outputs) to solve the task provided.
176177
"""
177178
SUBMIT_ANSWER_OPEN = """
178-
[Use the submit_answer tool to submit your final answer as a jsondictionary with keys as the question number and values as a short answer]
179+
[Use the submit_answer tool to submit your final answer as a single string with your short answer]
180+
IMPORTANT: Wrap your answer in XML tags <answer> </answer>
179181
Example output:
180182
```
181-
submit_answer({{
182-
"q1": "Short answer to question 1",
183-
"q2": "Short answer to question 2",
184-
"q3": "Short answer to question 3",
185-
"q4": "Short answer to question 4"
186-
}})
183+
submit_answer("<answer>Your concise answer to the question</answer>")
187184
```
188185
Remember, the final notebook should contain all necessary artifacts (plots, tables, print outputs) to solve the task provided.
189186
"""
190187
SUBMIT_ANSWER_MCQ = """
191-
[Use the submit_answer tool to submit your final answer as a json dictionary with keys as the question number and values as the answer]
188+
[Use the submit_answer tool to submit your final answer as a single string with the letter choice]
189+
IMPORTANT: Wrap your answer in XML tags <answer> </answer>
192190
Example output:
193191
```
194-
submit_answer({{
195-
"q1": "A",
196-
"q2": "B",
197-
"q3": "C",
198-
"q4": "D"
199-
}})
192+
submit_answer("<answer>A</answer>") or submit_answer("<answer>B</answer>") or submit_answer("<answer>C</answer>") or submit_answer("<answer>D</answer>")
193+
```
200194
Remember, the final notebook should contain all necessary artifacts (plots, tables, print outputs) to solve the task provided.
201195
"""
202196

@@ -215,10 +209,10 @@
215209
"""
216210
# MCQ
217211
MCQ_PROMPT_TEMPLATE = f"""
218-
Here are the questions you need to address:
219-
<questions>
220-
{{questions}}
221-
</questions>
212+
Here is the question you need to address:
213+
<question>
214+
{{question}}
215+
</question>
222216
223217
{CHAIN_OF_THOUGHT_AGNOSTIC}
224218
{SUBMIT_ANSWER_MCQ}
@@ -227,11 +221,11 @@
227221
"""
228222
# Open answer
229223
OPEN_PROMPT_TEMPLATE = f"""
230-
Here are the questions you need to address:
224+
Here is the question you need to address:
231225
232-
<questions>
233-
{{questions}}
234-
</questions>
226+
<question>
227+
{{question}}
228+
</question>
235229
236230
{CHAIN_OF_THOUGHT_AGNOSTIC}
237231
{SUBMIT_ANSWER_OPEN}

0 commit comments

Comments
 (0)