Add web session analysis platform with follow-up topics

This commit is contained in:
2026-03-09 22:23:00 +08:00
commit 17ce711e49
30 changed files with 10681 additions and 0 deletions

4
webapp/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
# -*- coding: utf-8 -*-
"""
Web application package for the data analysis platform.
"""

242
webapp/api.py Normal file
View File

@@ -0,0 +1,242 @@
# -*- coding: utf-8 -*-
"""
FastAPI application for the data analysis platform.
"""
import os
import re
import uuid
from typing import List, Optional
from fastapi import FastAPI, File, Form, HTTPException, Query, UploadFile
from fastapi.responses import FileResponse, HTMLResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel, Field
from webapp.session_manager import SessionManager
from webapp.storage import Storage, utcnow_iso
from webapp.task_runner import TaskRunner
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
RUNTIME_DIR = os.path.join(BASE_DIR, "runtime")
UPLOADS_DIR = os.path.join(RUNTIME_DIR, "uploads")
OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
DB_PATH = os.path.join(RUNTIME_DIR, "analysis_platform.db")
os.makedirs(UPLOADS_DIR, exist_ok=True)
os.makedirs(OUTPUTS_DIR, exist_ok=True)
storage = Storage(DB_PATH)
session_manager = SessionManager(OUTPUTS_DIR)
task_runner = TaskRunner(
storage=storage,
uploads_dir=UPLOADS_DIR,
outputs_dir=OUTPUTS_DIR,
session_manager=session_manager,
max_workers=2,
)
app = FastAPI(title="Data Analysis Platform API")
STATIC_DIR = os.path.join(os.path.dirname(__file__), "static")
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
class CreateSessionRequest(BaseModel):
user_id: str = Field(..., min_length=1)
title: str = Field(..., min_length=1)
query: str = Field(..., min_length=1)
file_ids: List[str]
template_file_id: Optional[str] = None
class CreateTopicRequest(BaseModel):
user_id: str = Field(..., min_length=1)
query: str = Field(..., min_length=1)
def sanitize_filename(filename: str) -> str:
cleaned = re.sub(r"[^A-Za-z0-9._-]+", "_", filename).strip("._")
return cleaned or "upload.bin"
def ensure_session_access(session_id: str, user_id: str) -> dict:
session = storage.get_session(session_id, user_id)
if not session:
raise HTTPException(status_code=404, detail="Session not found")
return session
def ensure_task_access(task_id: str, user_id: str) -> dict:
task = storage.get_task(task_id, user_id)
if not task:
raise HTTPException(status_code=404, detail="Task not found")
return task
@app.get("/health")
def health():
return {"status": "ok"}
@app.get("/", response_class=HTMLResponse)
def index():
index_path = os.path.join(STATIC_DIR, "index.html")
with open(index_path, "r", encoding="utf-8") as f:
return HTMLResponse(f.read())
@app.post("/files/upload")
async def upload_files(
user_id: str = Form(...),
files: List[UploadFile] = File(...),
):
saved = []
user_dir = os.path.join(UPLOADS_DIR, user_id)
os.makedirs(user_dir, exist_ok=True)
for upload in files:
safe_name = sanitize_filename(upload.filename or "upload.bin")
stored_path = os.path.join(user_dir, f"{uuid.uuid4()}_{safe_name}")
with open(stored_path, "wb") as f:
while True:
chunk = await upload.read(1024 * 1024)
if not chunk:
break
f.write(chunk)
saved.append(storage.create_uploaded_file(user_id, upload.filename or safe_name, stored_path))
return {"files": saved}
@app.get("/files")
def list_files(user_id: str = Query(...)):
return {"files": storage.list_all_uploaded_files(user_id)}
@app.post("/sessions")
def create_session(request: CreateSessionRequest):
if not storage.list_uploaded_files(request.file_ids, request.user_id):
raise HTTPException(status_code=400, detail="No valid files found for session")
session = storage.create_session(
user_id=request.user_id,
title=request.title,
uploaded_file_ids=request.file_ids,
template_file_id=request.template_file_id,
)
task = storage.create_task(
session_id=session["id"],
user_id=request.user_id,
query=request.query,
uploaded_file_ids=request.file_ids,
template_file_id=request.template_file_id,
)
task_runner.submit(task["id"], request.user_id)
return {"session": session, "task": task}
@app.get("/sessions")
def list_sessions(user_id: str = Query(...)):
return {"sessions": storage.list_sessions(user_id)}
@app.get("/sessions/{session_id}")
def get_session(session_id: str, user_id: str = Query(...)):
session = ensure_session_access(session_id, user_id)
tasks = storage.list_session_tasks(session_id, user_id)
return {"session": session, "tasks": tasks}
@app.post("/sessions/{session_id}/topics")
def create_followup_topic(session_id: str, request: CreateTopicRequest):
session = ensure_session_access(session_id, request.user_id)
if session["status"] == "closed":
raise HTTPException(status_code=400, detail="Session is closed")
task = storage.create_task(
session_id=session_id,
user_id=request.user_id,
query=request.query,
uploaded_file_ids=session["uploaded_file_ids"],
template_file_id=session.get("template_file_id"),
)
task_runner.submit(task["id"], request.user_id)
return {"session": session, "task": task}
@app.post("/sessions/{session_id}/close")
def close_session(session_id: str, user_id: str = Query(...)):
session = ensure_session_access(session_id, user_id)
storage.update_session(session_id, status="closed", closed_at=utcnow_iso())
session_manager.close(session_id)
return {"session": storage.get_session(session_id, user_id)}
@app.get("/tasks")
def list_tasks(user_id: str = Query(...)):
return {"tasks": storage.list_tasks(user_id)}
@app.get("/tasks/{task_id}")
def get_task(task_id: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
return {"task": task}
@app.get("/tasks/{task_id}/report")
def get_task_report(task_id: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
report_path = task.get("report_file_path")
if not report_path or not os.path.exists(report_path):
raise HTTPException(status_code=404, detail="Report not available")
return FileResponse(report_path, media_type="text/markdown", filename=os.path.basename(report_path))
@app.get("/tasks/{task_id}/report/content")
def get_task_report_content(task_id: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
report_path = task.get("report_file_path")
if not report_path or not os.path.exists(report_path):
raise HTTPException(status_code=404, detail="Report not available")
with open(report_path, "r", encoding="utf-8") as f:
return {"content": f.read(), "filename": os.path.basename(report_path)}
@app.get("/tasks/{task_id}/artifacts")
def list_task_artifacts(task_id: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
session_output_dir = task.get("session_output_dir")
if not session_output_dir or not os.path.isdir(session_output_dir):
return {"artifacts": []}
artifacts = []
for name in sorted(os.listdir(session_output_dir)):
path = os.path.join(session_output_dir, name)
if not os.path.isfile(path):
continue
artifacts.append(
{
"name": name,
"size": os.path.getsize(path),
"is_image": name.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".webp")),
"url": f"/tasks/{task_id}/artifacts/{name}?user_id={user_id}",
}
)
return {"artifacts": artifacts}
@app.get("/tasks/{task_id}/artifacts/{artifact_name}")
def get_artifact(task_id: str, artifact_name: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
session_output_dir = task.get("session_output_dir")
if not session_output_dir:
raise HTTPException(status_code=404, detail="Artifact directory not available")
artifact_path = os.path.realpath(os.path.join(session_output_dir, artifact_name))
session_root = os.path.realpath(session_output_dir)
if artifact_path != session_root and not artifact_path.startswith(session_root + os.sep):
raise HTTPException(status_code=400, detail="Invalid artifact path")
if not os.path.exists(artifact_path):
raise HTTPException(status_code=404, detail="Artifact not found")
return FileResponse(artifact_path, filename=os.path.basename(artifact_path))

66
webapp/session_manager.py Normal file
View File

@@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
"""
In-memory registry for long-lived analysis sessions.
"""
import os
import threading
from dataclasses import dataclass, field
from typing import Dict, Optional
from config.llm_config import LLMConfig
from data_analysis_agent import DataAnalysisAgent
@dataclass
class RuntimeSession:
session_id: str
user_id: str
session_output_dir: str
uploaded_files: list[str]
template_path: Optional[str]
agent: DataAnalysisAgent
initialized: bool = False
lock: threading.Lock = field(default_factory=threading.Lock)
class SessionManager:
"""Keeps session-scoped agents alive across follow-up topics."""
def __init__(self, outputs_dir: str):
self.outputs_dir = os.path.abspath(outputs_dir)
self._sessions: Dict[str, RuntimeSession] = {}
self._lock = threading.Lock()
def get_or_create(
self,
session_id: str,
user_id: str,
session_output_dir: str,
uploaded_files: list[str],
template_path: Optional[str],
) -> RuntimeSession:
with self._lock:
runtime = self._sessions.get(session_id)
if runtime is None:
runtime = RuntimeSession(
session_id=session_id,
user_id=user_id,
session_output_dir=session_output_dir,
uploaded_files=uploaded_files,
template_path=template_path,
agent=DataAnalysisAgent(
llm_config=LLMConfig(),
output_dir=self.outputs_dir,
max_rounds=20,
force_max_rounds=False,
),
)
self._sessions[session_id] = runtime
return runtime
def close(self, session_id: str) -> None:
with self._lock:
runtime = self._sessions.pop(session_id, None)
if runtime:
runtime.agent.close_session()

299
webapp/static/app.js Normal file
View File

@@ -0,0 +1,299 @@
const state = {
userId: null,
files: [],
sessions: [],
currentSessionId: null,
currentTaskId: null,
pollTimer: null,
};
function ensureUserId() {
const key = "vibe_data_ana_user_id";
let userId = localStorage.getItem(key);
if (!userId) {
userId = `guest_${crypto.randomUUID()}`;
localStorage.setItem(key, userId);
}
state.userId = userId;
document.getElementById("user-id").textContent = userId;
}
async function api(path, options = {}) {
const response = await fetch(path, options);
const text = await response.text();
let data = {};
try {
data = text ? JSON.parse(text) : {};
} catch {
data = { raw: text };
}
if (!response.ok) {
throw new Error(data.detail || data.error || response.statusText);
}
return data;
}
function setText(id, value) {
document.getElementById(id).textContent = value || "";
}
function renderFiles() {
const fileList = document.getElementById("file-list");
const picker = document.getElementById("session-file-picker");
fileList.innerHTML = "";
picker.innerHTML = "";
if (!state.files.length) {
fileList.innerHTML = '<div class="empty">还没有上传文件。</div>';
picker.innerHTML = '<div class="empty">先上传文件后才能创建会话。</div>';
return;
}
state.files.forEach((file) => {
const item = document.createElement("div");
item.className = "file-item";
item.innerHTML = `<strong>${file.original_name}</strong><div class="hint">${file.id}</div>`;
fileList.appendChild(item);
const label = document.createElement("label");
label.className = "checkbox-item";
label.innerHTML = `
<input type="checkbox" value="${file.id}" />
<span>${file.original_name}</span>
`;
picker.appendChild(label);
});
}
function statusBadge(status) {
return `<span class="status ${status}">${status}</span>`;
}
function renderSessions() {
const container = document.getElementById("session-list");
container.innerHTML = "";
if (!state.sessions.length) {
container.innerHTML = '<div class="empty">暂无会话。</div>';
return;
}
state.sessions.forEach((session) => {
const card = document.createElement("button");
card.type = "button";
card.className = `session-card ${session.id === state.currentSessionId ? "active" : ""}`;
card.innerHTML = `
<div><strong>${session.title}</strong></div>
<div class="hint">${session.id}</div>
<div>${statusBadge(session.status)}</div>
`;
card.onclick = () => loadSessionDetail(session.id);
container.appendChild(card);
});
}
function renderTasks(tasks) {
const container = document.getElementById("task-list");
container.innerHTML = "";
if (!tasks.length) {
container.innerHTML = '<div class="empty">当前会话还没有专题任务。</div>';
return;
}
tasks.forEach((task) => {
const card = document.createElement("button");
card.type = "button";
card.className = `task-card ${task.id === state.currentTaskId ? "active" : ""}`;
card.innerHTML = `
<div><strong>${task.query}</strong></div>
<div class="hint">${task.created_at}</div>
<div>${statusBadge(task.status)}</div>
`;
card.onclick = () => loadTaskReport(task.id);
container.appendChild(card);
});
}
async function refreshFiles() {
const data = await api(`/files?user_id=${encodeURIComponent(state.userId)}`);
state.files = data.files || [];
renderFiles();
}
async function refreshSessions(selectSessionId = null) {
const data = await api(`/sessions?user_id=${encodeURIComponent(state.userId)}`);
state.sessions = data.sessions || [];
renderSessions();
if (selectSessionId) {
await loadSessionDetail(selectSessionId);
} else if (state.currentSessionId) {
const exists = state.sessions.some((session) => session.id === state.currentSessionId);
if (exists) {
await loadSessionDetail(state.currentSessionId, false);
}
}
}
async function loadSessionDetail(sessionId, renderReport = true) {
const data = await api(`/sessions/${sessionId}?user_id=${encodeURIComponent(state.userId)}`);
state.currentSessionId = sessionId;
document.getElementById("detail-title").textContent = data.session.title;
document.getElementById("detail-meta").textContent = `${data.session.id} · ${data.session.status}`;
renderSessions();
renderTasks(data.tasks || []);
const latestDoneTask = (data.tasks || []).slice().reverse().find((task) => task.status === "succeeded");
if (renderReport && latestDoneTask) {
await loadTaskReport(latestDoneTask.id);
} else if (!latestDoneTask) {
setText("report-title", "暂无已完成专题");
setText("report-content", "当前会话还没有可展示的报告。");
document.getElementById("artifact-gallery").innerHTML = "";
}
}
async function loadTaskReport(taskId) {
state.currentTaskId = taskId;
renderSessions();
const taskData = await api(`/tasks/${taskId}?user_id=${encodeURIComponent(state.userId)}`);
setText("report-title", taskData.task.query);
if (taskData.task.status !== "succeeded") {
setText("report-content", `当前任务状态为 ${taskData.task.status}\n错误信息:${taskData.task.error_message || "暂无"}`);
document.getElementById("artifact-gallery").innerHTML = "";
return;
}
const reportData = await api(`/tasks/${taskId}/report/content?user_id=${encodeURIComponent(state.userId)}`);
setText("report-content", reportData.content || "");
const artifactData = await api(`/tasks/${taskId}/artifacts?user_id=${encodeURIComponent(state.userId)}`);
renderArtifacts(artifactData.artifacts || []);
}
function renderArtifacts(artifacts) {
const gallery = document.getElementById("artifact-gallery");
gallery.innerHTML = "";
const images = artifacts.filter((item) => item.is_image);
if (!images.length) {
gallery.innerHTML = '<div class="empty">当前任务没有图片产物。</div>';
return;
}
images.forEach((artifact) => {
const card = document.createElement("div");
card.className = "artifact-card";
card.innerHTML = `
<img src="${artifact.url}" alt="${artifact.name}" />
<div><a href="${artifact.url}" target="_blank" rel="noreferrer">${artifact.name}</a></div>
`;
gallery.appendChild(card);
});
}
async function handleUpload(event) {
event.preventDefault();
const input = document.getElementById("upload-input");
if (!input.files.length) {
setText("upload-status", "请选择至少一个文件。");
return;
}
const formData = new FormData();
formData.append("user_id", state.userId);
Array.from(input.files).forEach((file) => formData.append("files", file));
setText("upload-status", "上传中...");
await api("/files/upload", { method: "POST", body: formData });
setText("upload-status", "文件上传完成。");
input.value = "";
await refreshFiles();
}
async function handleCreateSession(event) {
event.preventDefault();
const checked = Array.from(document.querySelectorAll("#session-file-picker input:checked"));
const fileIds = checked.map((item) => item.value);
if (!fileIds.length) {
setText("session-status", "请至少选择一个文件。");
return;
}
const payload = {
user_id: state.userId,
title: document.getElementById("session-title").value.trim(),
query: document.getElementById("session-query").value.trim(),
file_ids: fileIds,
};
setText("session-status", "会话创建中...");
const data = await api("/sessions", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(payload),
});
setText("session-status", "会话已创建,正在执行首个专题。");
document.getElementById("session-form").reset();
await refreshSessions(data.session.id);
}
async function handleFollowup() {
if (!state.currentSessionId) {
setText("detail-meta", "请先选择一个会话。");
return;
}
const query = document.getElementById("followup-query").value.trim();
if (!query) {
return;
}
await api(`/sessions/${state.currentSessionId}/topics`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ user_id: state.userId, query }),
});
document.getElementById("followup-query").value = "";
await refreshSessions(state.currentSessionId);
}
async function handleCloseSession() {
if (!state.currentSessionId) {
return;
}
await api(`/sessions/${state.currentSessionId}/close?user_id=${encodeURIComponent(state.userId)}`, {
method: "POST",
});
await refreshSessions(state.currentSessionId);
}
function startPolling() {
if (state.pollTimer) {
clearInterval(state.pollTimer);
}
state.pollTimer = setInterval(() => {
refreshSessions().catch((error) => console.error(error));
}, 8000);
}
async function bootstrap() {
ensureUserId();
document.getElementById("upload-form").addEventListener("submit", (event) => {
handleUpload(event).catch((error) => setText("upload-status", error.message));
});
document.getElementById("session-form").addEventListener("submit", (event) => {
handleCreateSession(event).catch((error) => setText("session-status", error.message));
});
document.getElementById("submit-followup").onclick = () => {
handleFollowup().catch((error) => setText("detail-meta", error.message));
};
document.getElementById("close-session").onclick = () => {
handleCloseSession().catch((error) => setText("detail-meta", error.message));
};
document.getElementById("refresh-sessions").onclick = () => {
refreshSessions().catch((error) => console.error(error));
};
await refreshFiles();
await refreshSessions();
startPolling();
}
bootstrap().catch((error) => {
console.error(error);
setText("detail-meta", error.message);
});

88
webapp/static/index.html Normal file
View File

@@ -0,0 +1,88 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Vibe Data Analysis</title>
<link rel="stylesheet" href="/static/style.css" />
</head>
<body>
<main class="app-shell">
<section class="hero">
<div>
<p class="eyebrow">Vibe Data Analysis</p>
<h1>在线数据分析会话</h1>
<p class="subtle">
上传文件,发起分析,会话结束后继续追问新的专题,不中断当前上下文。
</p>
</div>
<div class="identity-card">
<span>当前访客标识</span>
<code id="user-id"></code>
</div>
</section>
<section class="panel-grid">
<section class="panel">
<h2>1. 上传文件</h2>
<form id="upload-form" class="stack-form">
<input id="upload-input" type="file" multiple />
<button type="submit">上传并登记</button>
</form>
<div id="upload-status" class="hint"></div>
<div id="file-list" class="file-list"></div>
</section>
<section class="panel">
<h2>2. 新建分析会话</h2>
<form id="session-form" class="stack-form">
<input id="session-title" type="text" placeholder="会话标题,例如:工单健康度" required />
<textarea id="session-query" rows="5" placeholder="输入首个分析专题,例如:请先整体评估工单健康度,并指出最需要关注的问题。" required></textarea>
<div id="session-file-picker" class="checkbox-list"></div>
<button type="submit">创建会话并开始分析</button>
</form>
<div id="session-status" class="hint"></div>
</section>
</section>
<section class="layout">
<aside class="sidebar panel">
<div class="sidebar-header">
<h2>会话列表</h2>
<button id="refresh-sessions" type="button">刷新</button>
</div>
<div id="session-list" class="session-list"></div>
</aside>
<section class="content panel">
<div class="content-header">
<div>
<h2 id="detail-title">未选择会话</h2>
<p id="detail-meta" class="hint">选择左侧会话查看分析结果与后续专题。</p>
</div>
<button id="close-session" type="button" class="ghost">结束当前会话</button>
</div>
<div class="followup-box">
<textarea id="followup-query" rows="4" placeholder="如果还有新的专题想继续分析,在这里输入。"></textarea>
<button id="submit-followup" type="button">继续分析该专题</button>
</div>
<div class="tasks-area">
<div class="tasks-column">
<h3>专题任务</h3>
<div id="task-list" class="task-list"></div>
</div>
<div class="report-column">
<h3>报告展示</h3>
<div id="report-title" class="report-title">暂无报告</div>
<pre id="report-content" class="report-content">选择一个已完成任务查看报告。</pre>
<div id="artifact-gallery" class="artifact-gallery"></div>
</div>
</div>
</section>
</section>
</main>
<script src="/static/app.js"></script>
</body>
</html>

278
webapp/static/style.css Normal file
View File

@@ -0,0 +1,278 @@
:root {
--bg: #f4efe7;
--panel: #fffaf2;
--line: #d8cdbd;
--text: #1f1a17;
--muted: #6e645a;
--accent: #b04a2f;
--accent-soft: #f2d5c4;
--success: #2f7d62;
--warning: #aa6a1f;
}
* {
box-sizing: border-box;
}
body {
margin: 0;
font-family: "IBM Plex Sans", "Noto Sans SC", sans-serif;
color: var(--text);
background:
radial-gradient(circle at top left, #fff7ec 0, transparent 28rem),
linear-gradient(180deg, #efe5d7 0%, var(--bg) 100%);
}
.app-shell {
max-width: 1440px;
margin: 0 auto;
padding: 24px;
}
.hero,
.panel,
.content,
.sidebar {
border: 1px solid var(--line);
background: rgba(255, 250, 242, 0.96);
backdrop-filter: blur(10px);
border-radius: 20px;
box-shadow: 0 12px 40px rgba(93, 67, 39, 0.08);
}
.hero {
display: flex;
justify-content: space-between;
gap: 24px;
padding: 24px 28px;
margin-bottom: 20px;
}
.eyebrow {
margin: 0 0 10px;
color: var(--accent);
text-transform: uppercase;
letter-spacing: 0.12em;
font-size: 12px;
}
h1,
h2,
h3,
p {
margin-top: 0;
}
.subtle,
.hint {
color: var(--muted);
}
.identity-card {
min-width: 220px;
padding: 16px;
border-radius: 16px;
background: linear-gradient(135deg, var(--accent-soft), #f8e7dc);
display: flex;
flex-direction: column;
gap: 8px;
}
.panel-grid,
.layout,
.tasks-area {
display: grid;
gap: 20px;
}
.panel-grid {
grid-template-columns: repeat(2, minmax(0, 1fr));
margin-bottom: 20px;
}
.layout {
grid-template-columns: 340px minmax(0, 1fr);
}
.tasks-area {
grid-template-columns: 320px minmax(0, 1fr);
}
.panel,
.content,
.sidebar {
padding: 20px;
}
.stack-form {
display: grid;
gap: 12px;
}
input,
textarea,
button {
font: inherit;
}
input,
textarea {
width: 100%;
padding: 12px 14px;
border-radius: 12px;
border: 1px solid var(--line);
background: #fffdf9;
}
button {
border: 0;
border-radius: 999px;
padding: 12px 18px;
background: var(--accent);
color: #fff;
cursor: pointer;
transition: transform 120ms ease, opacity 120ms ease;
}
button:hover {
transform: translateY(-1px);
opacity: 0.95;
}
button.ghost {
background: transparent;
color: var(--accent);
border: 1px solid var(--accent-soft);
}
.file-list,
.checkbox-list,
.session-list,
.task-list,
.artifact-gallery {
display: grid;
gap: 10px;
}
.file-item,
.session-card,
.task-card,
.artifact-card {
padding: 12px 14px;
border-radius: 14px;
border: 1px solid var(--line);
background: #fffdf8;
}
.checkbox-item {
display: flex;
align-items: center;
gap: 10px;
padding: 10px 12px;
border-radius: 12px;
border: 1px solid var(--line);
background: #fffdf8;
}
.sidebar-header,
.content-header {
display: flex;
justify-content: space-between;
align-items: start;
gap: 16px;
}
.followup-box {
margin: 20px 0;
display: grid;
gap: 12px;
}
.session-card.active,
.task-card.active {
border-color: var(--accent);
background: #fff3eb;
}
.status {
display: inline-flex;
padding: 4px 10px;
border-radius: 999px;
font-size: 12px;
font-weight: 600;
}
.status.queued {
background: #f1ead7;
color: #7b6114;
}
.status.running {
background: #e6edf9;
color: #1e5dab;
}
.status.succeeded,
.status.open {
background: #dff1ea;
color: var(--success);
}
.status.failed,
.status.closed {
background: #f8dfda;
color: #a03723;
}
.report-title {
margin-bottom: 10px;
color: var(--muted);
}
.report-content {
min-height: 360px;
max-height: 720px;
overflow: auto;
padding: 16px;
border-radius: 16px;
background: #1d1a18;
color: #f8f2ea;
line-height: 1.6;
white-space: pre-wrap;
}
.artifact-gallery {
margin-top: 16px;
grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));
}
.artifact-card img {
width: 100%;
height: 180px;
object-fit: cover;
border-radius: 12px;
background: #ede3d7;
}
.artifact-card a {
color: var(--accent);
text-decoration: none;
}
.empty {
color: var(--muted);
font-style: italic;
}
@media (max-width: 1024px) {
.panel-grid,
.layout,
.tasks-area,
.hero {
grid-template-columns: 1fr;
}
.hero {
flex-direction: column;
}
}

311
webapp/storage.py Normal file
View File

@@ -0,0 +1,311 @@
# -*- coding: utf-8 -*-
"""
SQLite-backed storage for uploaded files and analysis tasks.
"""
import json
import os
import sqlite3
import threading
import uuid
from contextlib import contextmanager
from datetime import datetime
from typing import Any, Dict, Iterable, List, Optional
def utcnow_iso() -> str:
return datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
class Storage:
"""Simple SQLite storage with thread-safe write operations."""
def __init__(self, db_path: str):
self.db_path = os.path.abspath(db_path)
os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
self._write_lock = threading.Lock()
self.init_db()
@contextmanager
def _connect(self):
conn = sqlite3.connect(self.db_path, check_same_thread=False)
conn.row_factory = sqlite3.Row
try:
yield conn
conn.commit()
finally:
conn.close()
def init_db(self) -> None:
with self._connect() as conn:
conn.executescript(
"""
CREATE TABLE IF NOT EXISTS uploaded_files (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
original_name TEXT NOT NULL,
stored_path TEXT NOT NULL,
created_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS analysis_sessions (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
title TEXT NOT NULL,
status TEXT NOT NULL,
uploaded_file_ids TEXT NOT NULL,
template_file_id TEXT,
session_output_dir TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
closed_at TEXT
);
CREATE TABLE IF NOT EXISTS analysis_tasks (
id TEXT PRIMARY KEY,
session_id TEXT NOT NULL,
user_id TEXT NOT NULL,
query TEXT NOT NULL,
status TEXT NOT NULL,
uploaded_file_ids TEXT NOT NULL,
template_file_id TEXT,
session_output_dir TEXT,
report_file_path TEXT,
error_message TEXT,
created_at TEXT NOT NULL,
started_at TEXT,
finished_at TEXT
);
"""
)
def create_uploaded_file(
self, user_id: str, original_name: str, stored_path: str
) -> Dict[str, Any]:
record = {
"id": str(uuid.uuid4()),
"user_id": user_id,
"original_name": original_name,
"stored_path": os.path.abspath(stored_path),
"created_at": utcnow_iso(),
}
with self._write_lock, self._connect() as conn:
conn.execute(
"""
INSERT INTO uploaded_files (id, user_id, original_name, stored_path, created_at)
VALUES (:id, :user_id, :original_name, :stored_path, :created_at)
""",
record,
)
return record
def get_uploaded_file(self, file_id: str, user_id: str) -> Optional[Dict[str, Any]]:
with self._connect() as conn:
row = conn.execute(
"""
SELECT * FROM uploaded_files WHERE id = ? AND user_id = ?
""",
(file_id, user_id),
).fetchone()
return dict(row) if row else None
def list_uploaded_files(
self, file_ids: Iterable[str], user_id: str
) -> List[Dict[str, Any]]:
file_ids = list(file_ids)
if not file_ids:
return []
placeholders = ",".join("?" for _ in file_ids)
params = [*file_ids, user_id]
with self._connect() as conn:
rows = conn.execute(
f"""
SELECT * FROM uploaded_files
WHERE id IN ({placeholders}) AND user_id = ?
ORDER BY created_at ASC
""",
params,
).fetchall()
return [dict(row) for row in rows]
def list_all_uploaded_files(self, user_id: str) -> List[Dict[str, Any]]:
with self._connect() as conn:
rows = conn.execute(
"""
SELECT * FROM uploaded_files
WHERE user_id = ?
ORDER BY created_at DESC
""",
(user_id,),
).fetchall()
return [dict(row) for row in rows]
def create_task(
self,
session_id: str,
user_id: str,
query: str,
uploaded_file_ids: List[str],
template_file_id: Optional[str] = None,
) -> Dict[str, Any]:
record = {
"id": str(uuid.uuid4()),
"session_id": session_id,
"user_id": user_id,
"query": query,
"status": "queued",
"uploaded_file_ids": json.dumps(uploaded_file_ids, ensure_ascii=False),
"template_file_id": template_file_id,
"session_output_dir": None,
"report_file_path": None,
"error_message": None,
"created_at": utcnow_iso(),
"started_at": None,
"finished_at": None,
}
with self._write_lock, self._connect() as conn:
conn.execute(
"""
INSERT INTO analysis_tasks (
id, session_id, user_id, query, status, uploaded_file_ids, template_file_id,
session_output_dir, report_file_path, error_message,
created_at, started_at, finished_at
)
VALUES (
:id, :session_id, :user_id, :query, :status, :uploaded_file_ids, :template_file_id,
:session_output_dir, :report_file_path, :error_message,
:created_at, :started_at, :finished_at
)
""",
record,
)
return self.get_task(record["id"], user_id)
def get_task(self, task_id: str, user_id: str) -> Optional[Dict[str, Any]]:
with self._connect() as conn:
row = conn.execute(
"""
SELECT * FROM analysis_tasks WHERE id = ? AND user_id = ?
""",
(task_id, user_id),
).fetchone()
return self._normalize_task(row) if row else None
def list_tasks(self, user_id: str) -> List[Dict[str, Any]]:
with self._connect() as conn:
rows = conn.execute(
"""
SELECT * FROM analysis_tasks
WHERE user_id = ?
ORDER BY created_at DESC
""",
(user_id,),
).fetchall()
return [self._normalize_task(row) for row in rows]
def list_session_tasks(self, session_id: str, user_id: str) -> List[Dict[str, Any]]:
with self._connect() as conn:
rows = conn.execute(
"""
SELECT * FROM analysis_tasks
WHERE session_id = ? AND user_id = ?
ORDER BY created_at ASC
""",
(session_id, user_id),
).fetchall()
return [self._normalize_task(row) for row in rows]
def create_session(
self,
user_id: str,
title: str,
uploaded_file_ids: List[str],
template_file_id: Optional[str] = None,
) -> Dict[str, Any]:
now = utcnow_iso()
record = {
"id": str(uuid.uuid4()),
"user_id": user_id,
"title": title,
"status": "open",
"uploaded_file_ids": json.dumps(uploaded_file_ids, ensure_ascii=False),
"template_file_id": template_file_id,
"session_output_dir": None,
"created_at": now,
"updated_at": now,
"closed_at": None,
}
with self._write_lock, self._connect() as conn:
conn.execute(
"""
INSERT INTO analysis_sessions (
id, user_id, title, status, uploaded_file_ids, template_file_id,
session_output_dir, created_at, updated_at, closed_at
)
VALUES (
:id, :user_id, :title, :status, :uploaded_file_ids, :template_file_id,
:session_output_dir, :created_at, :updated_at, :closed_at
)
""",
record,
)
return self.get_session(record["id"], user_id)
def get_session(self, session_id: str, user_id: str) -> Optional[Dict[str, Any]]:
with self._connect() as conn:
row = conn.execute(
"""
SELECT * FROM analysis_sessions WHERE id = ? AND user_id = ?
""",
(session_id, user_id),
).fetchone()
return self._normalize_session(row) if row else None
def list_sessions(self, user_id: str) -> List[Dict[str, Any]]:
with self._connect() as conn:
rows = conn.execute(
"""
SELECT * FROM analysis_sessions
WHERE user_id = ?
ORDER BY updated_at DESC
""",
(user_id,),
).fetchall()
return [self._normalize_session(row) for row in rows]
def update_session(self, session_id: str, **fields: Any) -> None:
if not fields:
return
fields["updated_at"] = utcnow_iso()
assignments = ", ".join(f"{key} = :{key}" for key in fields.keys())
payload = dict(fields)
payload["id"] = session_id
with self._write_lock, self._connect() as conn:
conn.execute(
f"UPDATE analysis_sessions SET {assignments} WHERE id = :id",
payload,
)
def update_task(self, task_id: str, **fields: Any) -> None:
if not fields:
return
assignments = ", ".join(f"{key} = :{key}" for key in fields.keys())
payload = dict(fields)
payload["id"] = task_id
with self._write_lock, self._connect() as conn:
conn.execute(
f"UPDATE analysis_tasks SET {assignments} WHERE id = :id",
payload,
)
@staticmethod
def _normalize_task(row: sqlite3.Row) -> Dict[str, Any]:
task = dict(row)
task["uploaded_file_ids"] = json.loads(task["uploaded_file_ids"])
return task
@staticmethod
def _normalize_session(row: sqlite3.Row) -> Dict[str, Any]:
session = dict(row)
session["uploaded_file_ids"] = json.loads(session["uploaded_file_ids"])
return session

147
webapp/task_runner.py Normal file
View File

@@ -0,0 +1,147 @@
# -*- coding: utf-8 -*-
"""
Background task runner for analysis jobs.
"""
import os
import shutil
import threading
from concurrent.futures import ThreadPoolExecutor
from contextlib import redirect_stderr, redirect_stdout
from typing import Optional
from utils.create_session_dir import create_session_output_dir
from webapp.session_manager import SessionManager
from webapp.storage import Storage, utcnow_iso
class TaskRunner:
"""Runs analysis tasks in background worker threads."""
def __init__(
self,
storage: Storage,
uploads_dir: str,
outputs_dir: str,
session_manager: SessionManager,
max_workers: int = 2,
):
self.storage = storage
self.uploads_dir = os.path.abspath(uploads_dir)
self.outputs_dir = os.path.abspath(outputs_dir)
self.session_manager = session_manager
self._executor = ThreadPoolExecutor(max_workers=max_workers)
self._lock = threading.Lock()
self._submitted = set()
def submit(self, task_id: str, user_id: str) -> None:
with self._lock:
if task_id in self._submitted:
return
self._submitted.add(task_id)
self._executor.submit(self._run_task, task_id, user_id)
def _run_task(self, task_id: str, user_id: str) -> None:
try:
task = self.storage.get_task(task_id, user_id)
if not task:
return
session = self.storage.get_session(task["session_id"], user_id)
if not session:
return
uploaded_files = self.storage.list_uploaded_files(
task["uploaded_file_ids"], user_id
)
data_files = [item["stored_path"] for item in uploaded_files]
template_path = self._resolve_template_path(task, user_id)
session_output_dir = session.get("session_output_dir")
if not session_output_dir:
session_output_dir = create_session_output_dir(
self.outputs_dir, session["title"]
)
self.storage.update_session(
session["id"],
session_output_dir=session_output_dir,
)
session = self.storage.get_session(task["session_id"], user_id)
runtime = self.session_manager.get_or_create(
session_id=session["id"],
user_id=user_id,
session_output_dir=session_output_dir,
uploaded_files=data_files,
template_path=template_path,
)
self.storage.update_task(
task_id,
status="running",
session_output_dir=session_output_dir,
started_at=utcnow_iso(),
error_message=None,
)
self.storage.update_session(session["id"], status="running")
log_path = os.path.join(session_output_dir, "task.log")
with runtime.lock:
with open(log_path, "a", encoding="utf-8") as log_file:
log_file.write(
f"[{utcnow_iso()}] task started for session {session['id']}\n"
)
try:
with redirect_stdout(log_file), redirect_stderr(log_file):
result = runtime.agent.analyze(
user_input=task["query"],
files=data_files,
template_path=template_path,
session_output_dir=session_output_dir,
reset_context=not runtime.initialized,
keep_session_open=True,
)
runtime.initialized = True
except Exception as exc:
self.storage.update_task(
task_id,
status="failed",
error_message=str(exc),
finished_at=utcnow_iso(),
report_file_path=None,
)
self.storage.update_session(session["id"], status="open")
log_file.write(f"[{utcnow_iso()}] task failed: {exc}\n")
return
report_file_path = self._persist_task_report(
task_id, session_output_dir, result.get("report_file_path")
)
self.storage.update_task(
task_id,
status="succeeded",
report_file_path=report_file_path,
finished_at=utcnow_iso(),
error_message=None,
)
self.storage.update_session(session["id"], status="open")
finally:
with self._lock:
self._submitted.discard(task_id)
def _resolve_template_path(self, task: dict, user_id: str) -> Optional[str]:
template_file_id = task.get("template_file_id")
if not template_file_id:
return None
file_record = self.storage.get_uploaded_file(template_file_id, user_id)
return file_record["stored_path"] if file_record else None
@staticmethod
def _persist_task_report(
task_id: str, session_output_dir: str, current_report_path: Optional[str]
) -> Optional[str]:
if not current_report_path or not os.path.exists(current_report_path):
return current_report_path
task_report_path = os.path.join(session_output_dir, f"report_{task_id}.md")
if os.path.abspath(current_report_path) != os.path.abspath(task_report_path):
shutil.copyfile(current_report_path, task_report_path)
return task_report_path