# -*- coding: utf-8 -*- """ FastAPI application for the data analysis platform. """ import os import re import uuid from typing import List, Optional from fastapi import FastAPI, File, Form, HTTPException, Query, UploadFile from fastapi.responses import FileResponse, HTMLResponse from fastapi.staticfiles import StaticFiles from pydantic import BaseModel, Field from webapp.session_manager import SessionManager from webapp.storage import Storage, utcnow_iso from webapp.task_runner import TaskRunner BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) RUNTIME_DIR = os.path.join(BASE_DIR, "runtime") UPLOADS_DIR = os.path.join(RUNTIME_DIR, "uploads") OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs") DB_PATH = os.path.join(RUNTIME_DIR, "analysis_platform.db") os.makedirs(UPLOADS_DIR, exist_ok=True) os.makedirs(OUTPUTS_DIR, exist_ok=True) storage = Storage(DB_PATH) session_manager = SessionManager(OUTPUTS_DIR) task_runner = TaskRunner( storage=storage, uploads_dir=UPLOADS_DIR, outputs_dir=OUTPUTS_DIR, session_manager=session_manager, max_workers=2, ) app = FastAPI(title="Data Analysis Platform API") STATIC_DIR = os.path.join(os.path.dirname(__file__), "static") app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static") class CreateSessionRequest(BaseModel): user_id: str = Field(..., min_length=1) title: str = Field(..., min_length=1) query: str = Field(..., min_length=1) file_ids: List[str] template_file_id: Optional[str] = None class CreateTopicRequest(BaseModel): user_id: str = Field(..., min_length=1) query: str = Field(..., min_length=1) def sanitize_filename(filename: str) -> str: cleaned = re.sub(r"[^A-Za-z0-9._-]+", "_", filename).strip("._") return cleaned or "upload.bin" def ensure_session_access(session_id: str, user_id: str) -> dict: session = storage.get_session(session_id, user_id) if not session: raise HTTPException(status_code=404, detail="Session not found") return session def ensure_task_access(task_id: str, user_id: str) -> dict: task = storage.get_task(task_id, user_id) if not task: raise HTTPException(status_code=404, detail="Task not found") return task @app.get("/health") def health(): return {"status": "ok"} @app.get("/", response_class=HTMLResponse) def index(): index_path = os.path.join(STATIC_DIR, "index.html") with open(index_path, "r", encoding="utf-8") as f: return HTMLResponse(f.read()) @app.post("/files/upload") async def upload_files( user_id: str = Form(...), files: List[UploadFile] = File(...), ): saved = [] user_dir = os.path.join(UPLOADS_DIR, user_id) os.makedirs(user_dir, exist_ok=True) for upload in files: safe_name = sanitize_filename(upload.filename or "upload.bin") stored_path = os.path.join(user_dir, f"{uuid.uuid4()}_{safe_name}") with open(stored_path, "wb") as f: while True: chunk = await upload.read(1024 * 1024) if not chunk: break f.write(chunk) saved.append(storage.create_uploaded_file(user_id, upload.filename or safe_name, stored_path)) return {"files": saved} @app.get("/files") def list_files(user_id: str = Query(...)): return {"files": storage.list_all_uploaded_files(user_id)} @app.post("/sessions") def create_session(request: CreateSessionRequest): if not storage.list_uploaded_files(request.file_ids, request.user_id): raise HTTPException(status_code=400, detail="No valid files found for session") session = storage.create_session( user_id=request.user_id, title=request.title, uploaded_file_ids=request.file_ids, template_file_id=request.template_file_id, ) task = storage.create_task( session_id=session["id"], user_id=request.user_id, query=request.query, uploaded_file_ids=request.file_ids, template_file_id=request.template_file_id, ) task_runner.submit(task["id"], request.user_id) return {"session": session, "task": task} @app.get("/sessions") def list_sessions(user_id: str = Query(...)): return {"sessions": storage.list_sessions(user_id)} @app.get("/sessions/{session_id}") def get_session(session_id: str, user_id: str = Query(...)): session = ensure_session_access(session_id, user_id) tasks = storage.list_session_tasks(session_id, user_id) return {"session": session, "tasks": tasks} @app.post("/sessions/{session_id}/topics") def create_followup_topic(session_id: str, request: CreateTopicRequest): session = ensure_session_access(session_id, request.user_id) if session["status"] == "closed": raise HTTPException(status_code=400, detail="Session is closed") task = storage.create_task( session_id=session_id, user_id=request.user_id, query=request.query, uploaded_file_ids=session["uploaded_file_ids"], template_file_id=session.get("template_file_id"), ) task_runner.submit(task["id"], request.user_id) return {"session": session, "task": task} @app.post("/sessions/{session_id}/close") def close_session(session_id: str, user_id: str = Query(...)): session = ensure_session_access(session_id, user_id) storage.update_session(session_id, status="closed", closed_at=utcnow_iso()) session_manager.close(session_id) return {"session": storage.get_session(session_id, user_id)} @app.get("/tasks") def list_tasks(user_id: str = Query(...)): return {"tasks": storage.list_tasks(user_id)} @app.get("/tasks/{task_id}") def get_task(task_id: str, user_id: str = Query(...)): task = ensure_task_access(task_id, user_id) return {"task": task} @app.get("/tasks/{task_id}/report") def get_task_report(task_id: str, user_id: str = Query(...)): task = ensure_task_access(task_id, user_id) report_path = task.get("report_file_path") if not report_path or not os.path.exists(report_path): raise HTTPException(status_code=404, detail="Report not available") return FileResponse(report_path, media_type="text/markdown", filename=os.path.basename(report_path)) @app.get("/tasks/{task_id}/report/content") def get_task_report_content(task_id: str, user_id: str = Query(...)): task = ensure_task_access(task_id, user_id) report_path = task.get("report_file_path") if not report_path or not os.path.exists(report_path): raise HTTPException(status_code=404, detail="Report not available") with open(report_path, "r", encoding="utf-8") as f: return {"content": f.read(), "filename": os.path.basename(report_path)} @app.get("/tasks/{task_id}/artifacts") def list_task_artifacts(task_id: str, user_id: str = Query(...)): task = ensure_task_access(task_id, user_id) session_output_dir = task.get("session_output_dir") if not session_output_dir or not os.path.isdir(session_output_dir): return {"artifacts": []} artifacts = [] for name in sorted(os.listdir(session_output_dir)): path = os.path.join(session_output_dir, name) if not os.path.isfile(path): continue artifacts.append( { "name": name, "size": os.path.getsize(path), "is_image": name.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".webp")), "url": f"/tasks/{task_id}/artifacts/{name}?user_id={user_id}", } ) return {"artifacts": artifacts} @app.get("/tasks/{task_id}/artifacts/{artifact_name}") def get_artifact(task_id: str, artifact_name: str, user_id: str = Query(...)): task = ensure_task_access(task_id, user_id) session_output_dir = task.get("session_output_dir") if not session_output_dir: raise HTTPException(status_code=404, detail="Artifact directory not available") artifact_path = os.path.realpath(os.path.join(session_output_dir, artifact_name)) session_root = os.path.realpath(session_output_dir) if artifact_path != session_root and not artifact_path.startswith(session_root + os.sep): raise HTTPException(status_code=400, detail="Invalid artifact path") if not os.path.exists(artifact_path): raise HTTPException(status_code=404, detail="Artifact not found") return FileResponse(artifact_path, filename=os.path.basename(artifact_path))