RAG3_Voice / clova_stt.py
jeongsoo's picture
Add voice recognition feature with Naver Clova API
14586a6
"""
๋„ค์ด๋ฒ„ ํด๋กœ๋ฐ” ์Œ์„ฑ์ธ์‹(STT) API ์—ฐ๋™ ๋ชจ๋“ˆ
"""
import os
import json
import requests
import logging
from typing import Dict, Any
from dotenv import load_dotenv
# .env ํŒŒ์ผ ๋กœ๋“œ
load_dotenv()
# ๋กœ๊น… ์„ค์ •
logger = logging.getLogger("ClovaSTT")
class ClovaSTT:
"""
๋„ค์ด๋ฒ„ ํด๋กœ๋ฐ” ์Œ์„ฑ์ธ์‹(STT) API ํด๋ž˜์Šค
"""
def __init__(self):
"""
ํด๋กœ๋ฐ” STT ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
"""
# .env ํŒŒ์ผ์—์„œ ์„ค์ • ๊ฐ€์ ธ์˜ค๊ธฐ
self.client_id = os.getenv("NAVER_CLIENT_ID", "")
self.client_secret = os.getenv("NAVER_CLIENT_SECRET", "")
# ํด๋ผ์ด์–ธํŠธ ID์™€ Secret ๊ฒ€์ฆ
if not self.client_id or not self.client_secret:
logger.warning("๋„ค์ด๋ฒ„ ํด๋กœ๋ฐ” API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
logger.warning(".env ํŒŒ์ผ์— NAVER_CLIENT_ID์™€ NAVER_CLIENT_SECRET๋ฅผ ์„ค์ •ํ•ด์ฃผ์„ธ์š”.")
else:
logger.info("๋„ค์ด๋ฒ„ ํด๋กœ๋ฐ” STT API ์„ค์ • ์™„๋ฃŒ")
def recognize(self, audio_bytes, language="Kor") -> Dict[str, Any]:
"""
์˜ค๋””์˜ค ๋ฐ์ดํ„ฐ๋ฅผ ํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜
Args:
audio_bytes: ์˜ค๋””์˜ค ํŒŒ์ผ ๋ฐ”์ดํŠธ ๋ฐ์ดํ„ฐ
language: ์–ธ์–ด ์ฝ”๋“œ (๊ธฐ๋ณธ๊ฐ’: 'Kor')
Returns:
์ธ์‹๋œ ํ…์ŠคํŠธ ๋˜๋Š” ์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€
"""
if not self.client_id or not self.client_secret:
logger.error("API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
return {"success": False, "error": "API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}
try:
# API ์—”๋“œํฌ์ธํŠธ URL
url = f"https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang={language}"
# ์š”์ฒญ ํ—ค๋” ์„ค์ •
headers = {
"X-NCP-APIGW-API-KEY-ID": self.client_id,
"X-NCP-APIGW-API-KEY": self.client_secret,
"Content-Type": "application/octet-stream"
}
logger.info("๋„ค์ด๋ฒ„ ํด๋กœ๋ฐ” STT ์š”์ฒญ ์ „์†ก ์ค‘...")
# API ์š”์ฒญ ์ „์†ก
response = requests.post(url, headers=headers, data=audio_bytes, timeout=30)
# ์‘๋‹ต ์ฒ˜๋ฆฌ
if response.status_code == 200:
result = response.json()
recognized_text = result.get("text", "")
logger.info(f"์ธ์‹ ์„ฑ๊ณต: {recognized_text[:50]}...")
return {
"success": True,
"text": recognized_text,
"result": result
}
else:
logger.error(f"API ์˜ค๋ฅ˜ ์‘๋‹ต: {response.status_code}, {response.text}")
return {
"success": False,
"error": f"API ์˜ค๋ฅ˜: {response.status_code}",
"details": response.text
}
except Exception as e:
logger.error(f"์Œ์„ฑ์ธ์‹ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
return {
"success": False,
"error": "์Œ์„ฑ์ธ์‹ ์ฒ˜๋ฆฌ ์‹คํŒจ",
"details": str(e)
}