Files
vibe_data_ana/webapp/api.py

243 lines
8.3 KiB
Python

# -*- coding: utf-8 -*-
"""
FastAPI application for the data analysis platform.
"""
import os
import re
import uuid
from typing import List, Optional
from fastapi import FastAPI, File, Form, HTTPException, Query, UploadFile
from fastapi.responses import FileResponse, HTMLResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel, Field
from webapp.session_manager import SessionManager
from webapp.storage import Storage, utcnow_iso
from webapp.task_runner import TaskRunner
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
RUNTIME_DIR = os.path.join(BASE_DIR, "runtime")
UPLOADS_DIR = os.path.join(RUNTIME_DIR, "uploads")
OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
DB_PATH = os.path.join(RUNTIME_DIR, "analysis_platform.db")
os.makedirs(UPLOADS_DIR, exist_ok=True)
os.makedirs(OUTPUTS_DIR, exist_ok=True)
storage = Storage(DB_PATH)
session_manager = SessionManager(OUTPUTS_DIR)
task_runner = TaskRunner(
storage=storage,
uploads_dir=UPLOADS_DIR,
outputs_dir=OUTPUTS_DIR,
session_manager=session_manager,
max_workers=2,
)
app = FastAPI(title="Data Analysis Platform API")
STATIC_DIR = os.path.join(os.path.dirname(__file__), "static")
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
class CreateSessionRequest(BaseModel):
user_id: str = Field(..., min_length=1)
title: str = Field(..., min_length=1)
query: str = Field(..., min_length=1)
file_ids: List[str]
template_file_id: Optional[str] = None
class CreateTopicRequest(BaseModel):
user_id: str = Field(..., min_length=1)
query: str = Field(..., min_length=1)
def sanitize_filename(filename: str) -> str:
cleaned = re.sub(r"[^A-Za-z0-9._-]+", "_", filename).strip("._")
return cleaned or "upload.bin"
def ensure_session_access(session_id: str, user_id: str) -> dict:
session = storage.get_session(session_id, user_id)
if not session:
raise HTTPException(status_code=404, detail="Session not found")
return session
def ensure_task_access(task_id: str, user_id: str) -> dict:
task = storage.get_task(task_id, user_id)
if not task:
raise HTTPException(status_code=404, detail="Task not found")
return task
@app.get("/health")
def health():
return {"status": "ok"}
@app.get("/", response_class=HTMLResponse)
def index():
index_path = os.path.join(STATIC_DIR, "index.html")
with open(index_path, "r", encoding="utf-8") as f:
return HTMLResponse(f.read())
@app.post("/files/upload")
async def upload_files(
user_id: str = Form(...),
files: List[UploadFile] = File(...),
):
saved = []
user_dir = os.path.join(UPLOADS_DIR, user_id)
os.makedirs(user_dir, exist_ok=True)
for upload in files:
safe_name = sanitize_filename(upload.filename or "upload.bin")
stored_path = os.path.join(user_dir, f"{uuid.uuid4()}_{safe_name}")
with open(stored_path, "wb") as f:
while True:
chunk = await upload.read(1024 * 1024)
if not chunk:
break
f.write(chunk)
saved.append(storage.create_uploaded_file(user_id, upload.filename or safe_name, stored_path))
return {"files": saved}
@app.get("/files")
def list_files(user_id: str = Query(...)):
return {"files": storage.list_all_uploaded_files(user_id)}
@app.post("/sessions")
def create_session(request: CreateSessionRequest):
if not storage.list_uploaded_files(request.file_ids, request.user_id):
raise HTTPException(status_code=400, detail="No valid files found for session")
session = storage.create_session(
user_id=request.user_id,
title=request.title,
uploaded_file_ids=request.file_ids,
template_file_id=request.template_file_id,
)
task = storage.create_task(
session_id=session["id"],
user_id=request.user_id,
query=request.query,
uploaded_file_ids=request.file_ids,
template_file_id=request.template_file_id,
)
task_runner.submit(task["id"], request.user_id)
return {"session": session, "task": task}
@app.get("/sessions")
def list_sessions(user_id: str = Query(...)):
return {"sessions": storage.list_sessions(user_id)}
@app.get("/sessions/{session_id}")
def get_session(session_id: str, user_id: str = Query(...)):
session = ensure_session_access(session_id, user_id)
tasks = storage.list_session_tasks(session_id, user_id)
return {"session": session, "tasks": tasks}
@app.post("/sessions/{session_id}/topics")
def create_followup_topic(session_id: str, request: CreateTopicRequest):
session = ensure_session_access(session_id, request.user_id)
if session["status"] == "closed":
raise HTTPException(status_code=400, detail="Session is closed")
task = storage.create_task(
session_id=session_id,
user_id=request.user_id,
query=request.query,
uploaded_file_ids=session["uploaded_file_ids"],
template_file_id=session.get("template_file_id"),
)
task_runner.submit(task["id"], request.user_id)
return {"session": session, "task": task}
@app.post("/sessions/{session_id}/close")
def close_session(session_id: str, user_id: str = Query(...)):
session = ensure_session_access(session_id, user_id)
storage.update_session(session_id, status="closed", closed_at=utcnow_iso())
session_manager.close(session_id)
return {"session": storage.get_session(session_id, user_id)}
@app.get("/tasks")
def list_tasks(user_id: str = Query(...)):
return {"tasks": storage.list_tasks(user_id)}
@app.get("/tasks/{task_id}")
def get_task(task_id: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
return {"task": task}
@app.get("/tasks/{task_id}/report")
def get_task_report(task_id: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
report_path = task.get("report_file_path")
if not report_path or not os.path.exists(report_path):
raise HTTPException(status_code=404, detail="Report not available")
return FileResponse(report_path, media_type="text/markdown", filename=os.path.basename(report_path))
@app.get("/tasks/{task_id}/report/content")
def get_task_report_content(task_id: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
report_path = task.get("report_file_path")
if not report_path or not os.path.exists(report_path):
raise HTTPException(status_code=404, detail="Report not available")
with open(report_path, "r", encoding="utf-8") as f:
return {"content": f.read(), "filename": os.path.basename(report_path)}
@app.get("/tasks/{task_id}/artifacts")
def list_task_artifacts(task_id: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
session_output_dir = task.get("session_output_dir")
if not session_output_dir or not os.path.isdir(session_output_dir):
return {"artifacts": []}
artifacts = []
for name in sorted(os.listdir(session_output_dir)):
path = os.path.join(session_output_dir, name)
if not os.path.isfile(path):
continue
artifacts.append(
{
"name": name,
"size": os.path.getsize(path),
"is_image": name.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".webp")),
"url": f"/tasks/{task_id}/artifacts/{name}?user_id={user_id}",
}
)
return {"artifacts": artifacts}
@app.get("/tasks/{task_id}/artifacts/{artifact_name}")
def get_artifact(task_id: str, artifact_name: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
session_output_dir = task.get("session_output_dir")
if not session_output_dir:
raise HTTPException(status_code=404, detail="Artifact directory not available")
artifact_path = os.path.realpath(os.path.join(session_output_dir, artifact_name))
session_root = os.path.realpath(session_output_dir)
if artifact_path != session_root and not artifact_path.startswith(session_root + os.sep):
raise HTTPException(status_code=400, detail="Invalid artifact path")
if not os.path.exists(artifact_path):
raise HTTPException(status_code=404, detail="Artifact not found")
return FileResponse(artifact_path, filename=os.path.basename(artifact_path))