mvp
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
fastapi==0.100.0
|
||||
uvicorn[standard]==0.22.0
|
||||
whisper==1.1.10
|
||||
openai-whisper
|
||||
pydub==0.25.1
|
||||
aiofiles==23.1.0
|
||||
python-multipart==0.0.6
|
||||
|
||||
+73
-12
@@ -3,30 +3,91 @@ from fastapi.responses import JSONResponse
|
||||
import whisper
|
||||
import tempfile
|
||||
import shutil
|
||||
import os
|
||||
import logging
|
||||
from pydub import AudioSegment
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
app = FastAPI()
|
||||
|
||||
model = whisper.load_model("small")
|
||||
# Load model at startup
|
||||
try:
|
||||
model = whisper.load_model("small")
|
||||
except Exception:
|
||||
logging.exception("Failed to load Whisper model")
|
||||
# re-raise so container fails fast if model can't be loaded
|
||||
raise
|
||||
|
||||
|
||||
def convert_to_wav(src_path: str) -> str:
|
||||
"""Convert an audio file (webm/ogg/mp3/...) to a 16 kHz mono WAV file using pydub/ffmpeg.
|
||||
|
||||
Returns path to the new WAV file (caller is responsible for cleanup).
|
||||
"""
|
||||
audio = AudioSegment.from_file(src_path)
|
||||
audio = audio.set_frame_rate(16000).set_channels(1)
|
||||
wav_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
||||
wav_tmp.close()
|
||||
audio.export(wav_tmp.name, format="wav")
|
||||
return wav_tmp.name
|
||||
|
||||
|
||||
@app.post("/transcribe")
|
||||
async def transcribe(file: UploadFile = File(...)):
|
||||
if not file.content_type.startswith("audio"):
|
||||
if not file.content_type or not file.content_type.startswith("audio"):
|
||||
raise HTTPException(status_code=400, detail="File must be audio")
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
||||
# preserve original extension if possible
|
||||
filename = file.filename or "upload"
|
||||
ext = os.path.splitext(filename)[1] or ""
|
||||
if not ext:
|
||||
# try to infer common extension from content-type
|
||||
if "webm" in file.content_type:
|
||||
ext = ".webm"
|
||||
elif "ogg" in file.content_type or "opus" in file.content_type:
|
||||
ext = ".ogg"
|
||||
elif "mpeg" in file.content_type or "mp3" in file.content_type:
|
||||
ext = ".mp3"
|
||||
else:
|
||||
ext = ".wav"
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
|
||||
contents = await file.read()
|
||||
tmp.write(contents)
|
||||
tmp.flush()
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
result = model.transcribe(tmp_path, language=None)
|
||||
text = result.get("text", "")
|
||||
finally:
|
||||
try:
|
||||
shutil.os.remove(tmp_path)
|
||||
except Exception:
|
||||
pass
|
||||
logging.info("Received upload %s (%d bytes, content-type=%s)", filename, os.path.getsize(tmp_path), file.content_type)
|
||||
|
||||
return JSONResponse({"text": text})
|
||||
# If the uploaded file is not a WAV, convert it to WAV first to ensure ffmpeg/pydub compatibility.
|
||||
wav_path = tmp_path
|
||||
converted = False
|
||||
try:
|
||||
if not tmp_path.lower().endswith('.wav'):
|
||||
try:
|
||||
wav_path = convert_to_wav(tmp_path)
|
||||
converted = True
|
||||
logging.info("Converted to wav: %s (size=%d)", wav_path, os.path.getsize(wav_path))
|
||||
except Exception as e:
|
||||
# conversion failed; return a helpful error including ffmpeg/pydub message
|
||||
logging.exception("Failed to convert uploaded audio to wav")
|
||||
# try to surface the underlying error text
|
||||
raise HTTPException(status_code=400, detail=f"Failed to convert audio: {e}")
|
||||
|
||||
try:
|
||||
result = model.transcribe(wav_path, language=None)
|
||||
text = result.get("text", "")
|
||||
except RuntimeError as e:
|
||||
# likely ffmpeg failed while loading audio; include error message for debugging
|
||||
logging.exception("Whisper failed to transcribe audio")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
return JSONResponse({"text": text})
|
||||
finally:
|
||||
# cleanup temp files
|
||||
for path in {tmp_path, wav_path}:
|
||||
try:
|
||||
if path and os.path.exists(path):
|
||||
os.remove(path)
|
||||
except Exception:
|
||||
logging.exception("Failed to remove temp file %s", path)
|
||||
|
||||
Reference in New Issue
Block a user