2026-03-09 10:06:21 +08:00
""" Task execution engine using ReAct pattern — fully AI-driven. """
2026-03-07 00:04:29 +08:00
import json
import re
import time
2026-03-09 10:06:21 +08:00
import logging
2026-03-07 00:04:29 +08:00
from typing import List , Dict , Any , Optional
from openai import OpenAI
from src . models . analysis_plan import AnalysisTask
from src . models . analysis_result import AnalysisResult
from src . tools . base import AnalysisTool
from src . data_access import DataAccessLayer
2026-03-09 10:06:21 +08:00
from src . config import get_config
logger = logging . getLogger ( __name__ )
2026-03-07 00:04:29 +08:00
def execute_task (
task : AnalysisTask ,
tools : List [ AnalysisTool ] ,
data_access : DataAccessLayer ,
max_iterations : int = 10
) - > AnalysisResult :
"""
Execute analysis task using ReAct pattern .
2026-03-09 10:06:21 +08:00
AI decides which tools to call and with what parameters .
No hardcoded heuristics — everything is AI - driven .
2026-03-07 00:04:29 +08:00
"""
start_time = time . time ( )
2026-03-09 10:06:21 +08:00
config = get_config ( )
api_key = config . llm . api_key
2026-03-07 00:04:29 +08:00
if not api_key :
return _fallback_task_execution ( task , tools , data_access )
2026-03-09 10:06:21 +08:00
client = OpenAI ( api_key = api_key , base_url = config . llm . base_url )
2026-03-07 00:04:29 +08:00
history = [ ]
visualizations = [ ]
2026-03-09 10:06:21 +08:00
column_names = data_access . columns
2026-03-07 00:04:29 +08:00
try :
for iteration in range ( max_iterations ) :
2026-03-09 10:06:21 +08:00
prompt = _build_thought_prompt ( task , tools , history , column_names )
response = client . chat . completions . create (
model = config . llm . model ,
2026-03-07 00:04:29 +08:00
messages = [
2026-03-09 10:06:21 +08:00
{ " role " : " system " , " content " : _system_prompt ( ) } ,
{ " role " : " user " , " content " : prompt }
2026-03-07 00:04:29 +08:00
] ,
2026-03-09 10:06:21 +08:00
temperature = 0.3 ,
max_tokens = 1200
2026-03-07 00:04:29 +08:00
)
2026-03-09 10:06:21 +08:00
thought = _parse_thought_response ( response . choices [ 0 ] . message . content )
2026-03-07 00:04:29 +08:00
history . append ( { " type " : " thought " , " content " : thought } )
2026-03-09 10:06:21 +08:00
2026-03-07 00:04:29 +08:00
if thought . get ( ' is_completed ' , False ) :
break
2026-03-09 10:06:21 +08:00
2026-03-07 00:04:29 +08:00
tool_name = thought . get ( ' selected_tool ' )
tool_params = thought . get ( ' tool_params ' , { } )
2026-03-09 10:06:21 +08:00
2026-03-07 00:04:29 +08:00
if tool_name :
tool = _find_tool ( tools , tool_name )
if tool :
action_result = call_tool ( tool , data_access , * * tool_params )
history . append ( {
" type " : " action " ,
" tool " : tool_name ,
" params " : tool_params
} )
history . append ( {
" type " : " observation " ,
" result " : action_result
} )
2026-03-09 10:06:21 +08:00
if isinstance ( action_result , dict ) and ' visualization_path ' in action_result :
2026-03-07 00:04:29 +08:00
visualizations . append ( action_result [ ' visualization_path ' ] )
2026-03-09 10:06:21 +08:00
if isinstance ( action_result , dict ) and action_result . get ( ' data ' , { } ) . get ( ' chart_path ' ) :
visualizations . append ( action_result [ ' data ' ] [ ' chart_path ' ] )
else :
history . append ( {
" type " : " observation " ,
" result " : { " error " : f " Tool ' { tool_name } ' not found. Available: { [ t . name for t in tools ] } " }
} )
2026-03-07 00:04:29 +08:00
insights = extract_insights ( history , client )
execution_time = time . time ( ) - start_time
2026-03-09 10:06:21 +08:00
# Collect all observation data
all_data = { }
for entry in history :
if entry [ ' type ' ] == ' observation ' :
result = entry . get ( ' result ' , { } )
if isinstance ( result , dict ) and result . get ( ' success ' , True ) :
all_data [ f " step_ { len ( all_data ) } " ] = result
2026-03-07 00:04:29 +08:00
return AnalysisResult (
task_id = task . id ,
task_name = task . name ,
success = True ,
2026-03-09 10:06:21 +08:00
data = all_data ,
2026-03-07 00:04:29 +08:00
visualizations = visualizations ,
insights = insights ,
execution_time = execution_time
)
2026-03-09 10:06:21 +08:00
2026-03-07 00:04:29 +08:00
except Exception as e :
2026-03-09 10:06:21 +08:00
logger . error ( f " Task execution failed: { e } " )
2026-03-07 00:04:29 +08:00
return AnalysisResult (
task_id = task . id ,
task_name = task . name ,
success = False ,
error = str ( e ) ,
2026-03-09 10:06:21 +08:00
execution_time = time . time ( ) - start_time
2026-03-07 00:04:29 +08:00
)
2026-03-09 10:06:21 +08:00
def _system_prompt ( ) - > str :
return (
" You are a data analyst executing analysis tasks by calling tools. "
" You can ONLY see column names and tool descriptions — never raw data rows. "
" You MUST call tools to get any data. Always respond with valid JSON. "
" Use actual column names. Pick the right tool and parameters for the task. "
)
2026-03-07 00:04:29 +08:00
def _build_thought_prompt (
task : AnalysisTask ,
tools : List [ AnalysisTool ] ,
2026-03-09 10:06:21 +08:00
history : List [ Dict [ str , Any ] ] ,
column_names : List [ str ] = None
2026-03-07 00:04:29 +08:00
) - > str :
2026-03-09 10:06:21 +08:00
""" Build prompt for the ReAct thought step. """
2026-03-07 00:04:29 +08:00
tool_descriptions = " \n " . join ( [
2026-03-09 10:06:21 +08:00
f " - { tool . name } : { tool . description } \n Parameters: { json . dumps ( tool . parameters . get ( ' properties ' , { } ) , ensure_ascii = False ) } "
2026-03-07 00:04:29 +08:00
for tool in tools
] )
2026-03-09 10:06:21 +08:00
columns_str = f " \n Available Data Columns: { ' , ' . join ( column_names ) } \n " if column_names else " "
history_str = " "
if history :
for h in history [ - 8 : ] :
if h [ ' type ' ] == ' thought ' :
content = h . get ( ' content ' , { } )
history_str + = f " \n Thought: { content . get ( ' reasoning ' , ' ' ) [ : 200 ] } "
elif h [ ' type ' ] == ' action ' :
history_str + = f " \n Action: { h . get ( ' tool ' , ' ' ) } ( { json . dumps ( h . get ( ' params ' , { } ) , ensure_ascii = False ) } ) "
elif h [ ' type ' ] == ' observation ' :
result = h . get ( ' result ' , { } )
result_str = json . dumps ( result , ensure_ascii = False , default = str ) [ : 500 ]
history_str + = f " \n Observation: { result_str } "
actions_taken = sum ( 1 for h in history if h [ ' type ' ] == ' action ' )
return f """ Task: { task . description }
Expected Output : { task . expected_output }
{ columns_str }
2026-03-07 00:04:29 +08:00
Available Tools :
{ tool_descriptions }
2026-03-09 10:06:21 +08:00
Execution History : { history_str if history_str else " (none yet — start by calling a tool) " }
2026-03-07 00:04:29 +08:00
2026-03-09 10:06:21 +08:00
Actions taken : { actions_taken }
2026-03-07 00:04:29 +08:00
2026-03-09 10:06:21 +08:00
Instructions :
1. Pick the most relevant tool and call it with correct column names .
2. After each observation , decide if you need more data or can conclude .
3. Aim for 2 - 4 tool calls total to gather enough data .
4. When you have enough data , set is_completed = true and summarize findings in reasoning .
Respond ONLY with this JSON ( no other text ) :
2026-03-07 00:04:29 +08:00
{ {
2026-03-09 10:06:21 +08:00
" reasoning " : " your analysis reasoning " ,
2026-03-07 00:04:29 +08:00
" is_completed " : false ,
" selected_tool " : " tool_name " ,
" tool_params " : { { " param " : " value " } }
} }
"""
def _parse_thought_response ( response_text : str ) - > Dict [ str , Any ] :
2026-03-09 10:06:21 +08:00
""" Parse AI thought response JSON. """
2026-03-07 00:04:29 +08:00
json_match = re . search ( r ' \ { .* \ } ' , response_text , re . DOTALL )
if json_match :
try :
return json . loads ( json_match . group ( ) )
except json . JSONDecodeError :
pass
return {
' reasoning ' : response_text ,
' is_completed ' : False ,
' selected_tool ' : None ,
' tool_params ' : { }
}
def call_tool (
tool : AnalysisTool ,
data_access : DataAccessLayer ,
* * kwargs
) - > Dict [ str , Any ] :
2026-03-09 10:06:21 +08:00
""" Call an analysis tool and return the result. """
2026-03-07 00:04:29 +08:00
try :
result = data_access . execute_tool ( tool , * * kwargs )
2026-03-09 10:06:21 +08:00
return { ' success ' : True , ' data ' : result }
2026-03-07 00:04:29 +08:00
except Exception as e :
2026-03-09 10:06:21 +08:00
return { ' success ' : False , ' error ' : str ( e ) }
2026-03-07 00:04:29 +08:00
def extract_insights (
history : List [ Dict [ str , Any ] ] ,
client : Optional [ OpenAI ] = None
) - > List [ str ] :
2026-03-09 10:06:21 +08:00
""" Extract insights from execution history using AI. """
2026-03-07 00:04:29 +08:00
if not client :
2026-03-09 10:06:21 +08:00
return _extract_insights_from_observations ( history )
config = get_config ( )
history_str = json . dumps ( history , indent = 2 , ensure_ascii = False , default = str ) [ : 4000 ]
2026-03-07 00:04:29 +08:00
try :
response = client . chat . completions . create (
2026-03-09 10:06:21 +08:00
model = config . llm . model ,
2026-03-07 00:04:29 +08:00
messages = [
2026-03-09 10:06:21 +08:00
{ " role " : " system " , " content " : " You are a data analyst. Extract key insights from analysis results. Respond in Chinese. Return a JSON array of 3-5 insight strings with specific numbers. " } ,
{ " role " : " user " , " content " : f " Execution history: \n { history_str } \n \n Extract 3-5 key data-driven insights as a JSON array of strings. " }
2026-03-07 00:04:29 +08:00
] ,
2026-03-09 10:06:21 +08:00
temperature = 0.5 ,
max_tokens = 800
2026-03-07 00:04:29 +08:00
)
2026-03-09 10:06:21 +08:00
text = response . choices [ 0 ] . message . content
json_match = re . search ( r ' \ [.* \ ] ' , text , re . DOTALL )
2026-03-07 00:04:29 +08:00
if json_match :
2026-03-09 10:06:21 +08:00
parsed = json . loads ( json_match . group ( ) )
if isinstance ( parsed , list ) and len ( parsed ) > 0 :
return parsed
except Exception as e :
logger . warning ( f " AI insight extraction failed: { e } " )
return _extract_insights_from_observations ( history )
def _extract_insights_from_observations ( history : List [ Dict [ str , Any ] ] ) - > List [ str ] :
""" Fallback: extract insights directly from observation data. """
insights = [ ]
for entry in history :
if entry [ ' type ' ] != ' observation ' :
continue
result = entry . get ( ' result ' , { } )
if not isinstance ( result , dict ) :
continue
data = result . get ( ' data ' , result )
if not isinstance ( data , dict ) :
continue
if ' groups ' in data :
top = data [ ' groups ' ] [ : 3 ] if isinstance ( data [ ' groups ' ] , list ) else [ ]
if top :
group_str = ' , ' . join ( f " { g . get ( ' group ' , ' ? ' ) } : { g . get ( ' value ' , 0 ) } " for g in top )
insights . append ( f " Top groups: { group_str } " )
if ' distribution ' in data :
dist = data [ ' distribution ' ] [ : 3 ] if isinstance ( data [ ' distribution ' ] , list ) else [ ]
if dist :
dist_str = ' , ' . join ( f " { d . get ( ' value ' , ' ? ' ) } : { d . get ( ' percentage ' , 0 ) : .1f } % " for d in dist )
insights . append ( f " Distribution: { dist_str } " )
if ' trend ' in data :
insights . append ( f " Trend: { data [ ' trend ' ] } , growth rate: { data . get ( ' growth_rate ' , ' N/A ' ) } " )
if ' outlier_count ' in data :
insights . append ( f " Outliers: { data [ ' outlier_count ' ] } ( { data . get ( ' outlier_percentage ' , 0 ) : .1f } %) " )
if ' mean ' in data and ' column ' in data :
insights . append ( f " { data [ ' column ' ] } : mean= { data [ ' mean ' ] : .2f } , median= { data . get ( ' median ' , ' N/A ' ) } " )
return insights [ : 5 ] if insights else [ " Analysis completed " ]
2026-03-07 00:04:29 +08:00
def _find_tool ( tools : List [ AnalysisTool ] , tool_name : str ) - > Optional [ AnalysisTool ] :
""" Find tool by name. """
for tool in tools :
if tool . name == tool_name :
return tool
return None
def _fallback_task_execution (
task : AnalysisTask ,
tools : List [ AnalysisTool ] ,
data_access : DataAccessLayer
) - > AnalysisResult :
2026-03-09 10:06:21 +08:00
""" Fallback execution without AI — runs required tools with minimal params. """
2026-03-07 00:04:29 +08:00
start_time = time . time ( )
2026-03-09 10:06:21 +08:00
all_data = { }
insights = [ ]
2026-03-07 00:04:29 +08:00
try :
2026-03-09 10:06:21 +08:00
columns = data_access . columns
tools_to_run = task . required_tools if task . required_tools else [ t . name for t in tools [ : 3 ] ]
for tool_name in tools_to_run :
2026-03-07 00:04:29 +08:00
tool = _find_tool ( tools , tool_name )
2026-03-09 10:06:21 +08:00
if not tool :
continue
# Try calling with first column as a basic param
params = _guess_minimal_params ( tool , columns )
if params :
result = call_tool ( tool , data_access , * * params )
if result . get ( ' success ' ) :
all_data [ tool_name ] = result . get ( ' data ' , { } )
2026-03-07 00:04:29 +08:00
return AnalysisResult (
task_id = task . id ,
task_name = task . name ,
2026-03-09 10:06:21 +08:00
success = True ,
data = all_data ,
insights = insights or [ " Fallback execution completed " ] ,
execution_time = time . time ( ) - start_time
2026-03-07 00:04:29 +08:00
)
except Exception as e :
return AnalysisResult (
task_id = task . id ,
task_name = task . name ,
success = False ,
error = str ( e ) ,
2026-03-09 10:06:21 +08:00
execution_time = time . time ( ) - start_time
2026-03-07 00:04:29 +08:00
)
2026-03-09 10:06:21 +08:00
def _guess_minimal_params ( tool : AnalysisTool , columns : List [ str ] ) - > Optional [ Dict [ str , Any ] ] :
""" Guess minimal params for fallback — just pick first applicable column. """
props = tool . parameters . get ( ' properties ' , { } )
required = tool . parameters . get ( ' required ' , [ ] )
params = { }
for param_name in required :
prop = props . get ( param_name , { } )
if prop . get ( ' type ' ) == ' string ' and ' column ' in param_name . lower ( ) :
params [ param_name ] = columns [ 0 ] if columns else ' '
elif prop . get ( ' type ' ) == ' string ' :
params [ param_name ] = columns [ 0 ] if columns else ' '
return params if params else None