Add web session analysis platform with follow-up topics
This commit is contained in:
242
webapp/api.py
Normal file
242
webapp/api.py
Normal file
@@ -0,0 +1,242 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
FastAPI application for the data analysis platform.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
from typing import List, Optional
|
||||
|
||||
from fastapi import FastAPI, File, Form, HTTPException, Query, UploadFile
|
||||
from fastapi.responses import FileResponse, HTMLResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from webapp.session_manager import SessionManager
|
||||
from webapp.storage import Storage, utcnow_iso
|
||||
from webapp.task_runner import TaskRunner
|
||||
|
||||
|
||||
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
RUNTIME_DIR = os.path.join(BASE_DIR, "runtime")
|
||||
UPLOADS_DIR = os.path.join(RUNTIME_DIR, "uploads")
|
||||
OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
|
||||
DB_PATH = os.path.join(RUNTIME_DIR, "analysis_platform.db")
|
||||
|
||||
os.makedirs(UPLOADS_DIR, exist_ok=True)
|
||||
os.makedirs(OUTPUTS_DIR, exist_ok=True)
|
||||
|
||||
storage = Storage(DB_PATH)
|
||||
session_manager = SessionManager(OUTPUTS_DIR)
|
||||
task_runner = TaskRunner(
|
||||
storage=storage,
|
||||
uploads_dir=UPLOADS_DIR,
|
||||
outputs_dir=OUTPUTS_DIR,
|
||||
session_manager=session_manager,
|
||||
max_workers=2,
|
||||
)
|
||||
|
||||
app = FastAPI(title="Data Analysis Platform API")
|
||||
STATIC_DIR = os.path.join(os.path.dirname(__file__), "static")
|
||||
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
||||
|
||||
|
||||
class CreateSessionRequest(BaseModel):
|
||||
user_id: str = Field(..., min_length=1)
|
||||
title: str = Field(..., min_length=1)
|
||||
query: str = Field(..., min_length=1)
|
||||
file_ids: List[str]
|
||||
template_file_id: Optional[str] = None
|
||||
|
||||
|
||||
class CreateTopicRequest(BaseModel):
|
||||
user_id: str = Field(..., min_length=1)
|
||||
query: str = Field(..., min_length=1)
|
||||
|
||||
|
||||
def sanitize_filename(filename: str) -> str:
|
||||
cleaned = re.sub(r"[^A-Za-z0-9._-]+", "_", filename).strip("._")
|
||||
return cleaned or "upload.bin"
|
||||
|
||||
|
||||
def ensure_session_access(session_id: str, user_id: str) -> dict:
|
||||
session = storage.get_session(session_id, user_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
return session
|
||||
|
||||
|
||||
def ensure_task_access(task_id: str, user_id: str) -> dict:
|
||||
task = storage.get_task(task_id, user_id)
|
||||
if not task:
|
||||
raise HTTPException(status_code=404, detail="Task not found")
|
||||
return task
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
def index():
|
||||
index_path = os.path.join(STATIC_DIR, "index.html")
|
||||
with open(index_path, "r", encoding="utf-8") as f:
|
||||
return HTMLResponse(f.read())
|
||||
|
||||
|
||||
@app.post("/files/upload")
|
||||
async def upload_files(
|
||||
user_id: str = Form(...),
|
||||
files: List[UploadFile] = File(...),
|
||||
):
|
||||
saved = []
|
||||
user_dir = os.path.join(UPLOADS_DIR, user_id)
|
||||
os.makedirs(user_dir, exist_ok=True)
|
||||
|
||||
for upload in files:
|
||||
safe_name = sanitize_filename(upload.filename or "upload.bin")
|
||||
stored_path = os.path.join(user_dir, f"{uuid.uuid4()}_{safe_name}")
|
||||
with open(stored_path, "wb") as f:
|
||||
while True:
|
||||
chunk = await upload.read(1024 * 1024)
|
||||
if not chunk:
|
||||
break
|
||||
f.write(chunk)
|
||||
saved.append(storage.create_uploaded_file(user_id, upload.filename or safe_name, stored_path))
|
||||
|
||||
return {"files": saved}
|
||||
|
||||
|
||||
@app.get("/files")
|
||||
def list_files(user_id: str = Query(...)):
|
||||
return {"files": storage.list_all_uploaded_files(user_id)}
|
||||
|
||||
|
||||
@app.post("/sessions")
|
||||
def create_session(request: CreateSessionRequest):
|
||||
if not storage.list_uploaded_files(request.file_ids, request.user_id):
|
||||
raise HTTPException(status_code=400, detail="No valid files found for session")
|
||||
|
||||
session = storage.create_session(
|
||||
user_id=request.user_id,
|
||||
title=request.title,
|
||||
uploaded_file_ids=request.file_ids,
|
||||
template_file_id=request.template_file_id,
|
||||
)
|
||||
task = storage.create_task(
|
||||
session_id=session["id"],
|
||||
user_id=request.user_id,
|
||||
query=request.query,
|
||||
uploaded_file_ids=request.file_ids,
|
||||
template_file_id=request.template_file_id,
|
||||
)
|
||||
task_runner.submit(task["id"], request.user_id)
|
||||
return {"session": session, "task": task}
|
||||
|
||||
|
||||
@app.get("/sessions")
|
||||
def list_sessions(user_id: str = Query(...)):
|
||||
return {"sessions": storage.list_sessions(user_id)}
|
||||
|
||||
|
||||
@app.get("/sessions/{session_id}")
|
||||
def get_session(session_id: str, user_id: str = Query(...)):
|
||||
session = ensure_session_access(session_id, user_id)
|
||||
tasks = storage.list_session_tasks(session_id, user_id)
|
||||
return {"session": session, "tasks": tasks}
|
||||
|
||||
|
||||
@app.post("/sessions/{session_id}/topics")
|
||||
def create_followup_topic(session_id: str, request: CreateTopicRequest):
|
||||
session = ensure_session_access(session_id, request.user_id)
|
||||
if session["status"] == "closed":
|
||||
raise HTTPException(status_code=400, detail="Session is closed")
|
||||
|
||||
task = storage.create_task(
|
||||
session_id=session_id,
|
||||
user_id=request.user_id,
|
||||
query=request.query,
|
||||
uploaded_file_ids=session["uploaded_file_ids"],
|
||||
template_file_id=session.get("template_file_id"),
|
||||
)
|
||||
task_runner.submit(task["id"], request.user_id)
|
||||
return {"session": session, "task": task}
|
||||
|
||||
|
||||
@app.post("/sessions/{session_id}/close")
|
||||
def close_session(session_id: str, user_id: str = Query(...)):
|
||||
session = ensure_session_access(session_id, user_id)
|
||||
storage.update_session(session_id, status="closed", closed_at=utcnow_iso())
|
||||
session_manager.close(session_id)
|
||||
return {"session": storage.get_session(session_id, user_id)}
|
||||
|
||||
|
||||
@app.get("/tasks")
|
||||
def list_tasks(user_id: str = Query(...)):
|
||||
return {"tasks": storage.list_tasks(user_id)}
|
||||
|
||||
|
||||
@app.get("/tasks/{task_id}")
|
||||
def get_task(task_id: str, user_id: str = Query(...)):
|
||||
task = ensure_task_access(task_id, user_id)
|
||||
return {"task": task}
|
||||
|
||||
|
||||
@app.get("/tasks/{task_id}/report")
|
||||
def get_task_report(task_id: str, user_id: str = Query(...)):
|
||||
task = ensure_task_access(task_id, user_id)
|
||||
report_path = task.get("report_file_path")
|
||||
if not report_path or not os.path.exists(report_path):
|
||||
raise HTTPException(status_code=404, detail="Report not available")
|
||||
return FileResponse(report_path, media_type="text/markdown", filename=os.path.basename(report_path))
|
||||
|
||||
|
||||
@app.get("/tasks/{task_id}/report/content")
|
||||
def get_task_report_content(task_id: str, user_id: str = Query(...)):
|
||||
task = ensure_task_access(task_id, user_id)
|
||||
report_path = task.get("report_file_path")
|
||||
if not report_path or not os.path.exists(report_path):
|
||||
raise HTTPException(status_code=404, detail="Report not available")
|
||||
with open(report_path, "r", encoding="utf-8") as f:
|
||||
return {"content": f.read(), "filename": os.path.basename(report_path)}
|
||||
|
||||
|
||||
@app.get("/tasks/{task_id}/artifacts")
|
||||
def list_task_artifacts(task_id: str, user_id: str = Query(...)):
|
||||
task = ensure_task_access(task_id, user_id)
|
||||
session_output_dir = task.get("session_output_dir")
|
||||
if not session_output_dir or not os.path.isdir(session_output_dir):
|
||||
return {"artifacts": []}
|
||||
|
||||
artifacts = []
|
||||
for name in sorted(os.listdir(session_output_dir)):
|
||||
path = os.path.join(session_output_dir, name)
|
||||
if not os.path.isfile(path):
|
||||
continue
|
||||
artifacts.append(
|
||||
{
|
||||
"name": name,
|
||||
"size": os.path.getsize(path),
|
||||
"is_image": name.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".webp")),
|
||||
"url": f"/tasks/{task_id}/artifacts/{name}?user_id={user_id}",
|
||||
}
|
||||
)
|
||||
return {"artifacts": artifacts}
|
||||
|
||||
|
||||
@app.get("/tasks/{task_id}/artifacts/{artifact_name}")
|
||||
def get_artifact(task_id: str, artifact_name: str, user_id: str = Query(...)):
|
||||
task = ensure_task_access(task_id, user_id)
|
||||
session_output_dir = task.get("session_output_dir")
|
||||
if not session_output_dir:
|
||||
raise HTTPException(status_code=404, detail="Artifact directory not available")
|
||||
|
||||
artifact_path = os.path.realpath(os.path.join(session_output_dir, artifact_name))
|
||||
session_root = os.path.realpath(session_output_dir)
|
||||
if artifact_path != session_root and not artifact_path.startswith(session_root + os.sep):
|
||||
raise HTTPException(status_code=400, detail="Invalid artifact path")
|
||||
if not os.path.exists(artifact_path):
|
||||
raise HTTPException(status_code=404, detail="Artifact not found")
|
||||
return FileResponse(artifact_path, filename=os.path.basename(artifact_path))
|
||||
Reference in New Issue
Block a user