init

2025-10-07 18:00:20 +02:00
commit 4793f1b183
11 changed files with 262 additions and 0 deletions
@@ -0,0 +1,15 @@
+FROM python:3.11-slim
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+	ffmpeg \
+	libsndfile1 \
+ && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt ./
+RUN python -m pip install --upgrade pip setuptools wheel && \
+	pip install --no-cache-dir -r requirements.txt
+
+COPY server.py ./
+CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"]
@@ -0,0 +1,4 @@
+fastapi==0.100.0
+uvicorn[standard]==0.22.0
+httpx==0.24.1
+python-multipart==0.0.6
@@ -0,0 +1,56 @@
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.responses import StreamingResponse, JSONResponse
+import httpx
+import tempfile
+import shutil
+import asyncio
+
+app = FastAPI()
+
+WHISPER_URL = "http://whisper:8001/transcribe"
+COQUITTS_URL = "http://coquitts:8002/speak"
+OLLAMA_URL = "http://ollama:11434/v1/complete"
+
+
+@app.post("/chat")
+async def chat(file: UploadFile = File(...)):
+    if not file.content_type.startswith("audio"):
+        raise HTTPException(status_code=400, detail="File must be audio")
+
+    # save file to temp
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+        contents = await file.read()
+        tmp.write(contents)
+        tmp.flush()
+        tmp_path = tmp.name
+
+    async with httpx.AsyncClient() as client:
+        # Send audio to whisper
+        with open(tmp_path, "rb") as f:
+            files = {"file": ("audio.wav", f, "audio/wav")}
+            r = await client.post(WHISPER_URL, files=files, timeout=120.0)
+
+        if r.status_code != 200:
+            raise HTTPException(status_code=502, detail=f"Whisper error: {r.status_code} {r.text}")
+
+        text = r.json().get("text", "")
+
+        # Send text to ollama for reasoning
+        # We assume Ollama HTTP API accepts JSON {"model":"<model>", "prompt":"..."}
+        ollama_payload = {"model": "llama2", "prompt": text}
+        ro = await client.post(OLLAMA_URL, json=ollama_payload, timeout=120.0)
+        if ro.status_code != 200:
+            raise HTTPException(status_code=502, detail=f"Ollama error: {ro.status_code} {ro.text}")
+
+        answer_json = ro.json()
+        # Depending on API shape, try to extract text
+        answer_text = answer_json.get("response") or answer_json.get("text") or answer_json.get("output") or str(answer_json)
+
+        # Send answer to coquitts to generate German audio
+        coquitts_payload = {"text": answer_text, "language": "de"}
+        co = await client.post(COQUITTS_URL, json=coquitts_payload, timeout=120.0)
+        if co.status_code != 200:
+            raise HTTPException(status_code=502, detail=f"CoquiTTS error: {co.status_code} {co.text}")
+
+        # stream the audio back
+        return StreamingResponse(co.aiter_bytes(), media_type="audio/wav")