mvp

2025-10-08 15:23:23 +02:00
parent b59f52cf86
commit 5e6eae61cc
8 changed files with 288 additions and 70 deletions
@@ -1,6 +1,6 @@
 fastapi==0.100.0
 uvicorn[standard]==0.22.0
-whisper==1.1.10
+openai-whisper
 pydub==0.25.1
 aiofiles==23.1.0
 python-multipart==0.0.6
@@ -3,30 +3,91 @@ from fastapi.responses import JSONResponse
 import whisper
 import tempfile
 import shutil
+import os
+import logging
+from pydub import AudioSegment

+logging.basicConfig(level=logging.INFO)
 app = FastAPI()

-model = whisper.load_model("small")
+# Load model at startup
+try:
+    model = whisper.load_model("small")
+except Exception:
+    logging.exception("Failed to load Whisper model")
+    # re-raise so container fails fast if model can't be loaded
+    raise
+
+
+def convert_to_wav(src_path: str) -> str:
+    """Convert an audio file (webm/ogg/mp3/...) to a 16 kHz mono WAV file using pydub/ffmpeg.
+
+    Returns path to the new WAV file (caller is responsible for cleanup).
+    """
+    audio = AudioSegment.from_file(src_path)
+    audio = audio.set_frame_rate(16000).set_channels(1)
+    wav_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+    wav_tmp.close()
+    audio.export(wav_tmp.name, format="wav")
+    return wav_tmp.name


@app.post("/transcribe")
 async def transcribe(file: UploadFile = File(...)):
-    if not file.content_type.startswith("audio"):
+    if not file.content_type or not file.content_type.startswith("audio"):
        raise HTTPException(status_code=400, detail="File must be audio")

-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+    # preserve original extension if possible
+    filename = file.filename or "upload"
+    ext = os.path.splitext(filename)[1] or ""
+    if not ext:
+        # try to infer common extension from content-type
+        if "webm" in file.content_type:
+            ext = ".webm"
+        elif "ogg" in file.content_type or "opus" in file.content_type:
+            ext = ".ogg"
+        elif "mpeg" in file.content_type or "mp3" in file.content_type:
+            ext = ".mp3"
+        else:
+            ext = ".wav"
+
+    with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
        contents = await file.read()
        tmp.write(contents)
        tmp.flush()
        tmp_path = tmp.name

-    try:
-        result = model.transcribe(tmp_path, language=None)
-        text = result.get("text", "")
-    finally:
-        try:
-            shutil.os.remove(tmp_path)
-        except Exception:
-            pass
+    logging.info("Received upload %s (%d bytes, content-type=%s)", filename, os.path.getsize(tmp_path), file.content_type)

-    return JSONResponse({"text": text})
+    # If the uploaded file is not a WAV, convert it to WAV first to ensure ffmpeg/pydub compatibility.
+    wav_path = tmp_path
+    converted = False
+    try:
+        if not tmp_path.lower().endswith('.wav'):
+            try:
+                wav_path = convert_to_wav(tmp_path)
+                converted = True
+                logging.info("Converted to wav: %s (size=%d)", wav_path, os.path.getsize(wav_path))
+            except Exception as e:
+                # conversion failed; return a helpful error including ffmpeg/pydub message
+                logging.exception("Failed to convert uploaded audio to wav")
+                # try to surface the underlying error text
+                raise HTTPException(status_code=400, detail=f"Failed to convert audio: {e}")
+
+        try:
+            result = model.transcribe(wav_path, language=None)
+            text = result.get("text", "")
+        except RuntimeError as e:
+            # likely ffmpeg failed while loading audio; include error message for debugging
+            logging.exception("Whisper failed to transcribe audio")
+            raise HTTPException(status_code=500, detail=str(e))
+
+        return JSONResponse({"text": text})
+    finally:
+        # cleanup temp files
+        for path in {tmp_path, wav_path}:
+            try:
+                if path and os.path.exists(path):
+                    os.remove(path)
+            except Exception:
+                logging.exception("Failed to remove temp file %s", path)