243 lines
8.3 KiB
Python
243 lines
8.3 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
FastAPI application for the data analysis platform.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import uuid
|
|
from typing import List, Optional
|
|
|
|
from fastapi import FastAPI, File, Form, HTTPException, Query, UploadFile
|
|
from fastapi.responses import FileResponse, HTMLResponse
|
|
from fastapi.staticfiles import StaticFiles
|
|
from pydantic import BaseModel, Field
|
|
|
|
from webapp.session_manager import SessionManager
|
|
from webapp.storage import Storage, utcnow_iso
|
|
from webapp.task_runner import TaskRunner
|
|
|
|
|
|
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
RUNTIME_DIR = os.path.join(BASE_DIR, "runtime")
|
|
UPLOADS_DIR = os.path.join(RUNTIME_DIR, "uploads")
|
|
OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
|
|
DB_PATH = os.path.join(RUNTIME_DIR, "analysis_platform.db")
|
|
|
|
os.makedirs(UPLOADS_DIR, exist_ok=True)
|
|
os.makedirs(OUTPUTS_DIR, exist_ok=True)
|
|
|
|
storage = Storage(DB_PATH)
|
|
session_manager = SessionManager(OUTPUTS_DIR)
|
|
task_runner = TaskRunner(
|
|
storage=storage,
|
|
uploads_dir=UPLOADS_DIR,
|
|
outputs_dir=OUTPUTS_DIR,
|
|
session_manager=session_manager,
|
|
max_workers=2,
|
|
)
|
|
|
|
app = FastAPI(title="Data Analysis Platform API")
|
|
STATIC_DIR = os.path.join(os.path.dirname(__file__), "static")
|
|
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
|
|
|
|
|
class CreateSessionRequest(BaseModel):
|
|
user_id: str = Field(..., min_length=1)
|
|
title: str = Field(..., min_length=1)
|
|
query: str = Field(..., min_length=1)
|
|
file_ids: List[str]
|
|
template_file_id: Optional[str] = None
|
|
|
|
|
|
class CreateTopicRequest(BaseModel):
|
|
user_id: str = Field(..., min_length=1)
|
|
query: str = Field(..., min_length=1)
|
|
|
|
|
|
def sanitize_filename(filename: str) -> str:
|
|
cleaned = re.sub(r"[^A-Za-z0-9._-]+", "_", filename).strip("._")
|
|
return cleaned or "upload.bin"
|
|
|
|
|
|
def ensure_session_access(session_id: str, user_id: str) -> dict:
|
|
session = storage.get_session(session_id, user_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
return session
|
|
|
|
|
|
def ensure_task_access(task_id: str, user_id: str) -> dict:
|
|
task = storage.get_task(task_id, user_id)
|
|
if not task:
|
|
raise HTTPException(status_code=404, detail="Task not found")
|
|
return task
|
|
|
|
|
|
@app.get("/health")
|
|
def health():
|
|
return {"status": "ok"}
|
|
|
|
|
|
@app.get("/", response_class=HTMLResponse)
|
|
def index():
|
|
index_path = os.path.join(STATIC_DIR, "index.html")
|
|
with open(index_path, "r", encoding="utf-8") as f:
|
|
return HTMLResponse(f.read())
|
|
|
|
|
|
@app.post("/files/upload")
|
|
async def upload_files(
|
|
user_id: str = Form(...),
|
|
files: List[UploadFile] = File(...),
|
|
):
|
|
saved = []
|
|
user_dir = os.path.join(UPLOADS_DIR, user_id)
|
|
os.makedirs(user_dir, exist_ok=True)
|
|
|
|
for upload in files:
|
|
safe_name = sanitize_filename(upload.filename or "upload.bin")
|
|
stored_path = os.path.join(user_dir, f"{uuid.uuid4()}_{safe_name}")
|
|
with open(stored_path, "wb") as f:
|
|
while True:
|
|
chunk = await upload.read(1024 * 1024)
|
|
if not chunk:
|
|
break
|
|
f.write(chunk)
|
|
saved.append(storage.create_uploaded_file(user_id, upload.filename or safe_name, stored_path))
|
|
|
|
return {"files": saved}
|
|
|
|
|
|
@app.get("/files")
|
|
def list_files(user_id: str = Query(...)):
|
|
return {"files": storage.list_all_uploaded_files(user_id)}
|
|
|
|
|
|
@app.post("/sessions")
|
|
def create_session(request: CreateSessionRequest):
|
|
if not storage.list_uploaded_files(request.file_ids, request.user_id):
|
|
raise HTTPException(status_code=400, detail="No valid files found for session")
|
|
|
|
session = storage.create_session(
|
|
user_id=request.user_id,
|
|
title=request.title,
|
|
uploaded_file_ids=request.file_ids,
|
|
template_file_id=request.template_file_id,
|
|
)
|
|
task = storage.create_task(
|
|
session_id=session["id"],
|
|
user_id=request.user_id,
|
|
query=request.query,
|
|
uploaded_file_ids=request.file_ids,
|
|
template_file_id=request.template_file_id,
|
|
)
|
|
task_runner.submit(task["id"], request.user_id)
|
|
return {"session": session, "task": task}
|
|
|
|
|
|
@app.get("/sessions")
|
|
def list_sessions(user_id: str = Query(...)):
|
|
return {"sessions": storage.list_sessions(user_id)}
|
|
|
|
|
|
@app.get("/sessions/{session_id}")
|
|
def get_session(session_id: str, user_id: str = Query(...)):
|
|
session = ensure_session_access(session_id, user_id)
|
|
tasks = storage.list_session_tasks(session_id, user_id)
|
|
return {"session": session, "tasks": tasks}
|
|
|
|
|
|
@app.post("/sessions/{session_id}/topics")
|
|
def create_followup_topic(session_id: str, request: CreateTopicRequest):
|
|
session = ensure_session_access(session_id, request.user_id)
|
|
if session["status"] == "closed":
|
|
raise HTTPException(status_code=400, detail="Session is closed")
|
|
|
|
task = storage.create_task(
|
|
session_id=session_id,
|
|
user_id=request.user_id,
|
|
query=request.query,
|
|
uploaded_file_ids=session["uploaded_file_ids"],
|
|
template_file_id=session.get("template_file_id"),
|
|
)
|
|
task_runner.submit(task["id"], request.user_id)
|
|
return {"session": session, "task": task}
|
|
|
|
|
|
@app.post("/sessions/{session_id}/close")
|
|
def close_session(session_id: str, user_id: str = Query(...)):
|
|
session = ensure_session_access(session_id, user_id)
|
|
storage.update_session(session_id, status="closed", closed_at=utcnow_iso())
|
|
session_manager.close(session_id)
|
|
return {"session": storage.get_session(session_id, user_id)}
|
|
|
|
|
|
@app.get("/tasks")
|
|
def list_tasks(user_id: str = Query(...)):
|
|
return {"tasks": storage.list_tasks(user_id)}
|
|
|
|
|
|
@app.get("/tasks/{task_id}")
|
|
def get_task(task_id: str, user_id: str = Query(...)):
|
|
task = ensure_task_access(task_id, user_id)
|
|
return {"task": task}
|
|
|
|
|
|
@app.get("/tasks/{task_id}/report")
|
|
def get_task_report(task_id: str, user_id: str = Query(...)):
|
|
task = ensure_task_access(task_id, user_id)
|
|
report_path = task.get("report_file_path")
|
|
if not report_path or not os.path.exists(report_path):
|
|
raise HTTPException(status_code=404, detail="Report not available")
|
|
return FileResponse(report_path, media_type="text/markdown", filename=os.path.basename(report_path))
|
|
|
|
|
|
@app.get("/tasks/{task_id}/report/content")
|
|
def get_task_report_content(task_id: str, user_id: str = Query(...)):
|
|
task = ensure_task_access(task_id, user_id)
|
|
report_path = task.get("report_file_path")
|
|
if not report_path or not os.path.exists(report_path):
|
|
raise HTTPException(status_code=404, detail="Report not available")
|
|
with open(report_path, "r", encoding="utf-8") as f:
|
|
return {"content": f.read(), "filename": os.path.basename(report_path)}
|
|
|
|
|
|
@app.get("/tasks/{task_id}/artifacts")
|
|
def list_task_artifacts(task_id: str, user_id: str = Query(...)):
|
|
task = ensure_task_access(task_id, user_id)
|
|
session_output_dir = task.get("session_output_dir")
|
|
if not session_output_dir or not os.path.isdir(session_output_dir):
|
|
return {"artifacts": []}
|
|
|
|
artifacts = []
|
|
for name in sorted(os.listdir(session_output_dir)):
|
|
path = os.path.join(session_output_dir, name)
|
|
if not os.path.isfile(path):
|
|
continue
|
|
artifacts.append(
|
|
{
|
|
"name": name,
|
|
"size": os.path.getsize(path),
|
|
"is_image": name.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".webp")),
|
|
"url": f"/tasks/{task_id}/artifacts/{name}?user_id={user_id}",
|
|
}
|
|
)
|
|
return {"artifacts": artifacts}
|
|
|
|
|
|
@app.get("/tasks/{task_id}/artifacts/{artifact_name}")
|
|
def get_artifact(task_id: str, artifact_name: str, user_id: str = Query(...)):
|
|
task = ensure_task_access(task_id, user_id)
|
|
session_output_dir = task.get("session_output_dir")
|
|
if not session_output_dir:
|
|
raise HTTPException(status_code=404, detail="Artifact directory not available")
|
|
|
|
artifact_path = os.path.realpath(os.path.join(session_output_dir, artifact_name))
|
|
session_root = os.path.realpath(session_output_dir)
|
|
if artifact_path != session_root and not artifact_path.startswith(session_root + os.sep):
|
|
raise HTTPException(status_code=400, detail="Invalid artifact path")
|
|
if not os.path.exists(artifact_path):
|
|
raise HTTPException(status_code=404, detail="Artifact not found")
|
|
return FileResponse(artifact_path, filename=os.path.basename(artifact_path))
|