94 lines
3.4 KiB
Python
94 lines
3.4 KiB
Python
from fastapi import FastAPI, File, UploadFile, HTTPException
|
|
from fastapi.responses import JSONResponse
|
|
import whisper
|
|
import tempfile
|
|
import shutil
|
|
import os
|
|
import logging
|
|
from pydub import AudioSegment
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
app = FastAPI()
|
|
|
|
# Load model at startup
|
|
try:
|
|
model = whisper.load_model("small")
|
|
except Exception:
|
|
logging.exception("Failed to load Whisper model")
|
|
# re-raise so container fails fast if model can't be loaded
|
|
raise
|
|
|
|
|
|
def convert_to_wav(src_path: str) -> str:
|
|
"""Convert an audio file (webm/ogg/mp3/...) to a 16 kHz mono WAV file using pydub/ffmpeg.
|
|
|
|
Returns path to the new WAV file (caller is responsible for cleanup).
|
|
"""
|
|
audio = AudioSegment.from_file(src_path)
|
|
audio = audio.set_frame_rate(16000).set_channels(1)
|
|
wav_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
|
wav_tmp.close()
|
|
audio.export(wav_tmp.name, format="wav")
|
|
return wav_tmp.name
|
|
|
|
|
|
@app.post("/transcribe")
|
|
async def transcribe(file: UploadFile = File(...)):
|
|
if not file.content_type or not file.content_type.startswith("audio"):
|
|
raise HTTPException(status_code=400, detail="File must be audio")
|
|
|
|
# preserve original extension if possible
|
|
filename = file.filename or "upload"
|
|
ext = os.path.splitext(filename)[1] or ""
|
|
if not ext:
|
|
# try to infer common extension from content-type
|
|
if "webm" in file.content_type:
|
|
ext = ".webm"
|
|
elif "ogg" in file.content_type or "opus" in file.content_type:
|
|
ext = ".ogg"
|
|
elif "mpeg" in file.content_type or "mp3" in file.content_type:
|
|
ext = ".mp3"
|
|
else:
|
|
ext = ".wav"
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
|
|
contents = await file.read()
|
|
tmp.write(contents)
|
|
tmp.flush()
|
|
tmp_path = tmp.name
|
|
|
|
logging.info("Received upload %s (%d bytes, content-type=%s)", filename, os.path.getsize(tmp_path), file.content_type)
|
|
|
|
# If the uploaded file is not a WAV, convert it to WAV first to ensure ffmpeg/pydub compatibility.
|
|
wav_path = tmp_path
|
|
converted = False
|
|
try:
|
|
if not tmp_path.lower().endswith('.wav'):
|
|
try:
|
|
wav_path = convert_to_wav(tmp_path)
|
|
converted = True
|
|
logging.info("Converted to wav: %s (size=%d)", wav_path, os.path.getsize(wav_path))
|
|
except Exception as e:
|
|
# conversion failed; return a helpful error including ffmpeg/pydub message
|
|
logging.exception("Failed to convert uploaded audio to wav")
|
|
# try to surface the underlying error text
|
|
raise HTTPException(status_code=400, detail=f"Failed to convert audio: {e}")
|
|
|
|
try:
|
|
result = model.transcribe(wav_path, language=None)
|
|
text = result.get("text", "")
|
|
except RuntimeError as e:
|
|
# likely ffmpeg failed while loading audio; include error message for debugging
|
|
logging.exception("Whisper failed to transcribe audio")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
return JSONResponse({"text": text})
|
|
finally:
|
|
# cleanup temp files
|
|
for path in {tmp_path, wav_path}:
|
|
try:
|
|
if path and os.path.exists(path):
|
|
os.remove(path)
|
|
except Exception:
|
|
logging.exception("Failed to remove temp file %s", path)
|