feat: add user interaction in data science scenario (#1251)

peteryang1 · XianBW · web-flow · commit 2afef703ca0e · 2025-09-18T11:10:12.000+08:00
* feat: add interactor classes and user interaction handling for experiments

* update code

* use fragment retry mechanism instead of rerun()

* fix a bug

* integrate user instructions into proposal and coder

* fix CI

* fix CI

* feat: add approval option for user instructions submission

* feat: enhance user instructions handling in Task and DSExperiment classes

* fix CI

* add user instructions into hypothesis rewrite

* add interface to command line

---------

Co-authored-by: Bowen Xian &lt;xianbowen@outlook.com&gt;
diff --git a/rdagent/app/cli.py b/rdagent/app/cli.py
@@ -61,6 +61,14 @@ def server_ui(port=19899):
     subprocess.run(["python", "rdagent/log/server/app.py", f"--port={port}"])
 
 
+def ds_user_interact(port=19900):
+    """
+    start web app to show the log traces in real time
+    """
+    commands = ["streamlit", "run", "rdagent/log/ui/ds_user_interact.py", f"--server.port={port}"]
+    subprocess.run(commands)
+
+
 app.command(name="fin_factor")(fin_factor)
 app.command(name="fin_model")(fin_model)
 app.command(name="fin_quant")(fin_quant)
@@ -72,6 +80,7 @@ def server_ui(port=19899):
 app.command(name="server_ui")(server_ui)
 app.command(name="health_check")(health_check)
 app.command(name="collect_info")(collect_info)
+app.command(name="ds_user_interact")(ds_user_interact)
 
 
 if __name__ == "__main__":
diff --git a/rdagent/app/data_science/conf.py b/rdagent/app/data_science/conf.py
@@ -1,3 +1,4 @@
+from pathlib import Path
 from typing import Literal
 
 from pydantic_settings import SettingsConfigDict
@@ -20,6 +21,7 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
 
     planner: str = "rdagent.scenarios.data_science.proposal.exp_gen.planner.DSExpPlannerHandCraft"
     hypothesis_gen: str = "rdagent.scenarios.data_science.proposal.exp_gen.router.ParallelMultiTraceExpGen"
+    interactor: str = "rdagent.components.interactor.SkipInteractor"
     trace_scheduler: str = "rdagent.scenarios.data_science.proposal.exp_gen.trace_scheduler.RoundRobinScheduler"
     """Hypothesis generation class"""
 
@@ -182,6 +184,9 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
 
     ensemble_time_upper_bound: bool = False
 
+    user_interaction_wait_seconds: int = 6000  # seconds to wait for user interaction
+    user_interaction_mid_folder: Path = Path.cwd() / "git_ignore_folder" / "RD-Agent_user_interaction"
+
 
 DS_RD_SETTING = DataScienceBasePropSetting()
 
diff --git a/rdagent/components/coder/data_science/pipeline/prompts.yaml b/rdagent/components/coder/data_science/pipeline/prompts.yaml
@@ -169,7 +169,8 @@ pipeline_coder:
     8. **Try-except blocks are ONLY allowed when reading files. If no files are successfully read, it indicates incorrect file paths or reading methods, not a try-except issue. Try-except is PROHIBITED elsewhere in the code. Assert statements are PROHIBITED throughout the entire code.**
     9. ATTENTION: ALWAYS use the best saved model (not necessarily final epoch) for predictions. **NEVER create dummy/placeholder submissions (e.g., all 1s, random values)**. If training fails, report failure honestly rather than generating fake submission files.
     10. You should ALWAYS generate the complete code rather than partial code.
-    11. Strictly follow all specifications and general guidelines described above.
+    11. If the task contains any user instructions, you must strictly follow them. User instructions have the highest priority and should be followed even if they conflict with other specifications or guidelines.
+    12. Strictly follow all specifications and general guidelines described above.
 
     ### Output Format
     {% if out_spec %}
diff --git a/rdagent/components/interactor/__init__.py b/rdagent/components/interactor/__init__.py
@@ -0,0 +1,17 @@
+from rdagent.core.experiment import ASpecificExp
+from rdagent.core.interactor import Interactor
+from rdagent.core.proposal import Trace
+
+
+class SkipInteractor(Interactor[ASpecificExp]):
+
+    def interact(self, exp: ASpecificExp, trace: Trace) -> ASpecificExp:
+        """
+        Interact with the user to get feedback or confirmation.
+
+        Responsibilities:
+        - Present the current state of the experiment to the user.
+        - Collect user input to guide the next steps in the experiment.
+        - Rewrite the experiment based on user feedback.
+        """
+        return exp
diff --git a/rdagent/components/workflow/conf.py b/rdagent/components/workflow/conf.py
@@ -11,6 +11,7 @@ class BasePropSetting(ExtendedBaseSettings):
     knowledge_base: str = ""
     knowledge_base_path: str = ""
     hypothesis_gen: str = ""
+    interactor: str = ""
     hypothesis2experiment: str = ""
     coder: str = ""
     runner: str = ""
diff --git a/rdagent/core/experiment.py b/rdagent/core/experiment.py
@@ -13,7 +13,7 @@
 from copy import deepcopy
 from dataclasses import dataclass
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Generic, TypeVar
+from typing import TYPE_CHECKING, Any, Generic, List, TypeVar
 
 from rdagent.core.conf import RD_AGENT_SETTINGS
 from rdagent.core.evaluation import Feedback
@@ -48,13 +48,28 @@ def get_task_information(self) -> str:
         """
 
 
+class UserInstructions(List[str]):
+    def __str__(self) -> str:
+        if self:
+            return ("\nUser Instructions (Top priority!):\n" + "\n".join(f"- {ui}" for ui in self)) if self else ""
+        else:
+            return ""
+
+
 class Task(AbsTask):
-    def __init__(self, name: str, version: int = 1, description: str = "") -> None:
+    def __init__(
+        self,
+        name: str,
+        version: int = 1,
+        description: str = "",
+        user_instructions: UserInstructions | None = None,
+    ) -> None:
         super().__init__(name, version)
         self.description = description
+        self.user_instructions = user_instructions
 
     def get_task_information(self) -> str:
-        return f"Task Name: {self.name}\nDescription: {self.description}"
+        return f"Task Name: {self.name}\nDescription: {self.description}{str(self.user_instructions)}"
 
     def __repr__(self) -> str:
         return f"<{self.__class__.__name__} {self.name}>"
@@ -410,6 +425,21 @@ def __init__(
         self.plan: ExperimentPlan | None = (
             None  # To store the planning information for this experiment, should be generated inside exp_gen.gen
         )
+        self.user_instructions: UserInstructions | None = None  # To store the user instructions for this experiment
+
+    def set_user_instructions(self, user_instructions: UserInstructions | None) -> None:
+        if user_instructions is None:
+            return
+        if not isinstance(user_instructions, UserInstructions) and isinstance(user_instructions, list):
+            user_instructions = UserInstructions(user_instructions)
+        self.user_instructions = user_instructions
+        for ws in self.sub_workspace_list:
+            if ws is not None:
+                ws.target_task.user_instructions = user_instructions  # type: ignore[union-attr]
+        for task in self.sub_tasks:
+            task.user_instructions = user_instructions
+        if self.experiment_workspace is not None and self.experiment_workspace.target_task is not None:
+            self.experiment_workspace.target_task.user_instructions = user_instructions
 
     @property
     def result(self) -> object:
diff --git a/rdagent/core/interactor.py b/rdagent/core/interactor.py
@@ -0,0 +1,26 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from trace import Trace
+from typing import TYPE_CHECKING, Generic
+
+from rdagent.core.experiment import ASpecificExp
+
+if TYPE_CHECKING:
+    from rdagent.core.scenario import Scenario
+
+
+class Interactor(ABC, Generic[ASpecificExp]):
+    def __init__(self, scen: Scenario) -> None:
+        self.scen: Scenario = scen
+
+    @abstractmethod
+    def interact(self, exp: ASpecificExp, trace: Trace | None = None) -> ASpecificExp:
+        """
+        Interact with the experiment to get feedback or confirmation.
+
+        Responsibilities:
+        - Present the current state of the experiment.
+        - Collect input to guide the next steps in the experiment.
+        - Rewrite the experiment based on feedback.
+        """
diff --git a/rdagent/log/ui/ds_user_interact.py b/rdagent/log/ui/ds_user_interact.py
@@ -0,0 +1,172 @@
+import json
+import pickle
+import time
+from datetime import datetime, timedelta
+from pathlib import Path
+
+import streamlit as st
+from streamlit import session_state as state
+
+from rdagent.app.data_science.conf import DS_RD_SETTING
+
+st.set_page_config(layout="wide", page_title="RD-Agent_user_interact", page_icon="🎓", initial_sidebar_state="expanded")
+
+# 初始化session state
+if "sessions" not in state:
+    state.sessions = {}
+if "selected_session_name" not in state:
+    state.selected_session_name = None
+
+
+def render_main_content():
+    """渲染主要内容区域"""
+    if state.selected_session_name is not None and state.selected_session_name in state.sessions:
+        selected_session_data = state.sessions[state.selected_session_name]
+        if selected_session_data is not None:
+            st.title(
+                f"Session: {state.selected_session_name[:4]} with competition {selected_session_data['competition']}"
+            )
+            st.title("Contextual Information:")
+            st.subheader("Competition scenario:", divider=True)
+            scenario = st.code(selected_session_data["scenario_description"], language="yaml")
+            st.subheader("Former attempts summary:", divider=True)
+            scenario = st.code(selected_session_data["ds_trace_desc"], language="yaml")
+            if selected_session_data["current_code"] != "":
+                st.subheader("Current SOTA code", divider=True)
+                scenario = st.code(
+                    body=selected_session_data["current_code"],
+                    language="python",
+                )
+
+            st.subheader("Hypothesis candidates:", divider=True)
+            hypothesis_candidates = selected_session_data["hypothesis_candidates"]
+            tabs = st.tabs(
+                [
+                    f"{'✅' if i == selected_session_data['target_hypothesis_index'] or selected_session_data['target_hypothesis_index'] == -1 else ''}Hypothesis {i+1}"
+                    for i in range(len(hypothesis_candidates))
+                ]
+            )
+            for index, hypothesis in enumerate(hypothesis_candidates):
+                with tabs[index]:
+                    st.code(str(hypothesis), language="yaml")
+            st.text("✅ means picked as target hypothesis")
+
+            st.title("Decisions to make:")
+
+            with st.form(key="user_form"):
+                st.caption("Please modify the fields below and submit to provide your feedback.")
+                target_hypothesis = st.text_area(
+                    "Target hypothesis: (you can copy from candidates)",
+                    value=(original_hypothesis := selected_session_data["target_hypothesis"].hypothesis),
+                    height="content",
+                )
+                target_task = st.text_area(
+                    "Target task description:",
+                    value=(original_task_desc := selected_session_data["task"].description),
+                    height="content",
+                )
+                original_user_instruction = selected_session_data.get("user_instruction")
+                user_instruction_list = []
+                if selected_session_data.get("former_user_instructions") is not None:
+                    st.caption(
+                        "Former user instructions, you can modify or delete the content to remove certain instruction."
+                    )
+                    for user_instruction in selected_session_data.get("former_user_instructions"):
+                        user_instruction_list.append(
+                            st.text_area("Former user instruction", value=user_instruction, height="content")
+                        )
+                user_instruction_list.append(st.text_area("Add new user instruction", value="", height="content"))
+                submit = st.form_submit_button("Submit")
+                approve = st.form_submit_button("Approve without changes")
+
+                if submit or approve:
+                    if approve:
+                        submit_dict = {
+                            "action": "confirm",
+                        }
+                    else:
+                        user_instruction_str_list = [ui for ui in user_instruction_list if ui.strip() != ""]
+                        user_instruction_str_list = (
+                            None if len(user_instruction_str_list) == 0 else user_instruction_str_list
+                        )
+                        action = (
+                            "confirm"
+                            if target_hypothesis == original_hypothesis
+                            and target_task == original_task_desc
+                            and user_instruction_str_list == original_user_instruction
+                            else "rewrite"
+                        )
+                        submit_dict = {
+                            "target_hypothesis": target_hypothesis,
+                            "task_description": target_task,
+                            "user_instruction": user_instruction_str_list,
+                            "action": action,
+                        }
+                    json.dump(
+                        submit_dict,
+                        open(
+                            DS_RD_SETTING.user_interaction_mid_folder / f"{state.selected_session_name}_RET.json", "w"
+                        ),
+                    )
+                    Path(DS_RD_SETTING.user_interaction_mid_folder / f"{state.selected_session_name}.pkl").unlink(
+                        missing_ok=True
+                    )
+                    st.success("Your feedback has been submitted. Thank you!")
+                    time.sleep(5)
+                    state.selected_session_name = None
+
+            if st.button("Extend expiration by 60s"):
+                session_data = pickle.load(
+                    open(DS_RD_SETTING.user_interaction_mid_folder / f"{state.selected_session_name}.pkl", "rb")
+                )
+                session_data["expired_datetime"] = session_data["expired_datetime"] + timedelta(seconds=60)
+                pickle.dump(
+                    session_data,
+                    open(DS_RD_SETTING.user_interaction_mid_folder / f"{state.selected_session_name}.pkl", "wb"),
+                )
+    else:
+        st.warning("Please select a session from the sidebar.")
+
+
+# 每秒更新一次sessions
+@st.fragment(run_every=1)
+def update_sessions():
+    log_folder = Path(DS_RD_SETTING.user_interaction_mid_folder)
+    state.sessions = {}
+    for session_file in log_folder.glob("*.pkl"):
+        try:
+            session_data = pickle.load(open(session_file, "rb"))
+            if session_data["expired_datetime"] > datetime.now():
+                state.sessions[session_file.stem] = session_data
+            else:
+                session_file.unlink(missing_ok=True)
+                ret_file = log_folder / f"{session_file.stem}_RET.json"
+                ret_file.unlink(missing_ok=True)
+        except Exception as e:
+            continue
+    render_main_content()
+
+
+@st.fragment(run_every=1)
+def render_sidebar():
+    st.title("R&D-Agent User Interaction Portal")
+    if state.sessions:
+        st.header("Active Sessions")
+        st.caption("Click a session to view:")
+        session_names = [name for name in state.sessions]
+        for session_name in session_names:
+            with st.container(border=True):
+                remaining = state.sessions[session_name]["expired_datetime"] - datetime.now()
+                total_sec = int(remaining.total_seconds())
+                label = f"{total_sec}s to expire" if total_sec > 0 else "Expired"
+                if st.button(f"session id:{session_name[:4]}", key=f"session_btn_{session_name}"):
+                    state.selected_session_name = session_name
+                    state.data = state.sessions[session_name]
+                st.markdown(f"⏳ {label}")
+    else:
+        st.warning("No active sessions available. Please wait.")
+
+
+update_sessions()
+with st.sidebar:
+    render_sidebar()
diff --git a/rdagent/scenarios/data_science/experiment/experiment.py b/rdagent/scenarios/data_science/experiment/experiment.py
@@ -3,13 +3,13 @@
 
 import pandas as pd
 
-from rdagent.core.experiment import Experiment, FBWorkspace, Task
+from rdagent.core.experiment import Experiment, FBWorkspace, Task, UserInstructions
 
 COMPONENT = Literal["DataLoadSpec", "FeatureEng", "Model", "Ensemble", "Workflow", "Pipeline"]
 
 
 class DSExperiment(Experiment[Task, FBWorkspace, FBWorkspace]):
-    def __init__(self, pending_tasks_list: list, *args, **kwargs) -> None:
+    def __init__(self, pending_tasks_list: list, hypothesis_candidates: list | None = None, *args, **kwargs) -> None:
         super().__init__(sub_tasks=[], *args, **kwargs)
         # Status
         # - Initial: blank;
@@ -18,11 +18,20 @@ def __init__(self, pending_tasks_list: list, *args, **kwargs) -> None:
         # the initial workspace or the successful new version after coding
         self.experiment_workspace = FBWorkspace()
         self.pending_tasks_list = pending_tasks_list
+        self.hypothesis_candidates = hypothesis_candidates
 
         self.format_check_result = None
         # this field is optional. It  is not none only when we have a format checker. Currently, only following cases are supported.
         # - mle-bench
 
+    def set_user_instructions(self, user_instructions: UserInstructions | None):
+        super().set_user_instructions(user_instructions)
+        if user_instructions is None:
+            return
+        for task_list in self.pending_tasks_list:
+            for task in task_list:
+                task.user_instructions = user_instructions
+
     def is_ready_to_run(self) -> bool:
         """
         ready to run does not indicate the experiment is runnable
diff --git a/rdagent/scenarios/data_science/interactor/__init__.py b/rdagent/scenarios/data_science/interactor/__init__.py
diff --git a/rdagent/scenarios/data_science/loop.py b/rdagent/scenarios/data_science/loop.py
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/prompts_v2.yaml b/rdagent/scenarios/data_science/proposal/exp_gen/prompts_v2.yaml
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py b/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py