57 lines
2.2 KiB
Python
57 lines
2.2 KiB
Python
from fastapi import FastAPI, File, UploadFile, HTTPException
|
|
from fastapi.responses import StreamingResponse, JSONResponse
|
|
import httpx
|
|
import tempfile
|
|
import shutil
|
|
import asyncio
|
|
|
|
app = FastAPI()
|
|
|
|
WHISPER_URL = "http://whisper:8001/transcribe"
|
|
COQUITTS_URL = "http://coquitts:8002/speak"
|
|
OLLAMA_URL = "http://ollama:11434/v1/complete"
|
|
|
|
|
|
@app.post("/chat")
|
|
async def chat(file: UploadFile = File(...)):
|
|
if not file.content_type.startswith("audio"):
|
|
raise HTTPException(status_code=400, detail="File must be audio")
|
|
|
|
# save file to temp
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
|
contents = await file.read()
|
|
tmp.write(contents)
|
|
tmp.flush()
|
|
tmp_path = tmp.name
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
# Send audio to whisper
|
|
with open(tmp_path, "rb") as f:
|
|
files = {"file": ("audio.wav", f, "audio/wav")}
|
|
r = await client.post(WHISPER_URL, files=files, timeout=120.0)
|
|
|
|
if r.status_code != 200:
|
|
raise HTTPException(status_code=502, detail=f"Whisper error: {r.status_code} {r.text}")
|
|
|
|
text = r.json().get("text", "")
|
|
|
|
# Send text to ollama for reasoning
|
|
# We assume Ollama HTTP API accepts JSON {"model":"<model>", "prompt":"..."}
|
|
ollama_payload = {"model": "llama2", "prompt": text}
|
|
ro = await client.post(OLLAMA_URL, json=ollama_payload, timeout=120.0)
|
|
if ro.status_code != 200:
|
|
raise HTTPException(status_code=502, detail=f"Ollama error: {ro.status_code} {ro.text}")
|
|
|
|
answer_json = ro.json()
|
|
# Depending on API shape, try to extract text
|
|
answer_text = answer_json.get("response") or answer_json.get("text") or answer_json.get("output") or str(answer_json)
|
|
|
|
# Send answer to coquitts to generate German audio
|
|
coquitts_payload = {"text": answer_text, "language": "de"}
|
|
co = await client.post(COQUITTS_URL, json=coquitts_payload, timeout=120.0)
|
|
if co.status_code != 200:
|
|
raise HTTPException(status_code=502, detail=f"CoquiTTS error: {co.status_code} {co.text}")
|
|
|
|
# stream the audio back
|
|
return StreamingResponse(co.aiter_bytes(), media_type="audio/wav")
|