import os import openai from flask import Flask, request, jsonify, send_file from transformers import pipeline from openai import OpenAI from gtts import gTTS app = Flask(__name__) client = OpenAI() openai.api_key = os.getenv("OPENAI_API_KEY") pipe = pipeline(model="seeafricatz/kiaziboraasr") def transcribe(audio_path): text = pipe(audio_path)["text"] return text def generate_response(transcribed_text): response = client.chat_completions.create( messages=[ {"role": "system", "content": "All your answers should be in Swahili only..."}, {"role": "user", "content": transcribed_text}, ], model="gpt-4" ) return response.choices[0].message.content def inference(text): tts = gTTS(text, lang='sw') output_file = "tts_output.mp3" tts.save(output_file) return output_file @app.route('/process_audio', methods=['POST']) def process_audio(): if 'audio' not in request.files: return jsonify({'error': 'No audio file provided'}), 400 audio_file = request.files['audio'] audio_path = "temp_audio.wav" audio_file.save(audio_path) transcribed_text = transcribe(audio_path) response_text = generate_response(transcribed_text) output_file = inference(response_text) return jsonify({ 'response_text': response_text, 'response_audio_url': f'/audio/{output_file}' }) @app.route('/audio/') def audio(filename): return send_file(filename, as_attachment=True) if __name__ == '__main__': app.run(host='0.0.0.0', port=8000)