Add web session analysis platform with follow-up topics
This commit is contained in:
147
webapp/task_runner.py
Normal file
147
webapp/task_runner.py
Normal file
@@ -0,0 +1,147 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Background task runner for analysis jobs.
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from contextlib import redirect_stderr, redirect_stdout
|
||||
from typing import Optional
|
||||
|
||||
from utils.create_session_dir import create_session_output_dir
|
||||
from webapp.session_manager import SessionManager
|
||||
from webapp.storage import Storage, utcnow_iso
|
||||
|
||||
|
||||
class TaskRunner:
|
||||
"""Runs analysis tasks in background worker threads."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
storage: Storage,
|
||||
uploads_dir: str,
|
||||
outputs_dir: str,
|
||||
session_manager: SessionManager,
|
||||
max_workers: int = 2,
|
||||
):
|
||||
self.storage = storage
|
||||
self.uploads_dir = os.path.abspath(uploads_dir)
|
||||
self.outputs_dir = os.path.abspath(outputs_dir)
|
||||
self.session_manager = session_manager
|
||||
self._executor = ThreadPoolExecutor(max_workers=max_workers)
|
||||
self._lock = threading.Lock()
|
||||
self._submitted = set()
|
||||
|
||||
def submit(self, task_id: str, user_id: str) -> None:
|
||||
with self._lock:
|
||||
if task_id in self._submitted:
|
||||
return
|
||||
self._submitted.add(task_id)
|
||||
self._executor.submit(self._run_task, task_id, user_id)
|
||||
|
||||
def _run_task(self, task_id: str, user_id: str) -> None:
|
||||
try:
|
||||
task = self.storage.get_task(task_id, user_id)
|
||||
if not task:
|
||||
return
|
||||
session = self.storage.get_session(task["session_id"], user_id)
|
||||
if not session:
|
||||
return
|
||||
|
||||
uploaded_files = self.storage.list_uploaded_files(
|
||||
task["uploaded_file_ids"], user_id
|
||||
)
|
||||
data_files = [item["stored_path"] for item in uploaded_files]
|
||||
template_path = self._resolve_template_path(task, user_id)
|
||||
session_output_dir = session.get("session_output_dir")
|
||||
if not session_output_dir:
|
||||
session_output_dir = create_session_output_dir(
|
||||
self.outputs_dir, session["title"]
|
||||
)
|
||||
self.storage.update_session(
|
||||
session["id"],
|
||||
session_output_dir=session_output_dir,
|
||||
)
|
||||
session = self.storage.get_session(task["session_id"], user_id)
|
||||
|
||||
runtime = self.session_manager.get_or_create(
|
||||
session_id=session["id"],
|
||||
user_id=user_id,
|
||||
session_output_dir=session_output_dir,
|
||||
uploaded_files=data_files,
|
||||
template_path=template_path,
|
||||
)
|
||||
|
||||
self.storage.update_task(
|
||||
task_id,
|
||||
status="running",
|
||||
session_output_dir=session_output_dir,
|
||||
started_at=utcnow_iso(),
|
||||
error_message=None,
|
||||
)
|
||||
self.storage.update_session(session["id"], status="running")
|
||||
|
||||
log_path = os.path.join(session_output_dir, "task.log")
|
||||
with runtime.lock:
|
||||
with open(log_path, "a", encoding="utf-8") as log_file:
|
||||
log_file.write(
|
||||
f"[{utcnow_iso()}] task started for session {session['id']}\n"
|
||||
)
|
||||
try:
|
||||
with redirect_stdout(log_file), redirect_stderr(log_file):
|
||||
result = runtime.agent.analyze(
|
||||
user_input=task["query"],
|
||||
files=data_files,
|
||||
template_path=template_path,
|
||||
session_output_dir=session_output_dir,
|
||||
reset_context=not runtime.initialized,
|
||||
keep_session_open=True,
|
||||
)
|
||||
runtime.initialized = True
|
||||
except Exception as exc:
|
||||
self.storage.update_task(
|
||||
task_id,
|
||||
status="failed",
|
||||
error_message=str(exc),
|
||||
finished_at=utcnow_iso(),
|
||||
report_file_path=None,
|
||||
)
|
||||
self.storage.update_session(session["id"], status="open")
|
||||
log_file.write(f"[{utcnow_iso()}] task failed: {exc}\n")
|
||||
return
|
||||
|
||||
report_file_path = self._persist_task_report(
|
||||
task_id, session_output_dir, result.get("report_file_path")
|
||||
)
|
||||
|
||||
self.storage.update_task(
|
||||
task_id,
|
||||
status="succeeded",
|
||||
report_file_path=report_file_path,
|
||||
finished_at=utcnow_iso(),
|
||||
error_message=None,
|
||||
)
|
||||
self.storage.update_session(session["id"], status="open")
|
||||
finally:
|
||||
with self._lock:
|
||||
self._submitted.discard(task_id)
|
||||
|
||||
def _resolve_template_path(self, task: dict, user_id: str) -> Optional[str]:
|
||||
template_file_id = task.get("template_file_id")
|
||||
if not template_file_id:
|
||||
return None
|
||||
file_record = self.storage.get_uploaded_file(template_file_id, user_id)
|
||||
return file_record["stored_path"] if file_record else None
|
||||
|
||||
@staticmethod
|
||||
def _persist_task_report(
|
||||
task_id: str, session_output_dir: str, current_report_path: Optional[str]
|
||||
) -> Optional[str]:
|
||||
if not current_report_path or not os.path.exists(current_report_path):
|
||||
return current_report_path
|
||||
task_report_path = os.path.join(session_output_dir, f"report_{task_id}.md")
|
||||
if os.path.abspath(current_report_path) != os.path.abspath(task_report_path):
|
||||
shutil.copyfile(current_report_path, task_report_path)
|
||||
return task_report_path
|
||||
Reference in New Issue
Block a user