{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from openai import OpenAI\n", "from dotenv import load_dotenv\n", "import os" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "load_dotenv()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "openai_key = os.getenv(\"OPENAI\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "client = OpenAI(api_key=openai_key)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "response = client.audio.speech.create(\n", " model=\"tts-1-hd\",\n", " voice=\"alloy\",\n", " input=\"My Name is Testimony[laughs]\"\n", ")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_39704/3198597338.py:1: DeprecationWarning: Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead\n", " response.stream_to_file(\"../data/output.mp3\")\n" ] } ], "source": [ "response.stream_to_file(\"../data/output.mp3\")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "audio_file = open(\"../data/output.mp3\", \"rb\")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "text_response = client.audio.transcriptions.create(\n", " model=\"whisper-1\",\n", " file= audio_file,\n", " response_format= 'text'\n", ")" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'My name is testimony.\\n'" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text_response" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "\n", "def transcribe(audio_file):\n", " transcription = client.audio.transcriptions.create(\n", " model=\"whisper-1\", \n", " file=audio_file, \n", " response_format=\"text\",\n", " prompt=\"ZyntriQix, Digique Plus, CynapseFive, VortiQore V8, EchoNix Array, OrbitalLink Seven, DigiFractal Matrix, PULSE, RAPT, B.R.I.C.K., Q.U.A.R.T.Z., F.L.I.N.T.\"\n", " )\n", " return transcription" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "system_prompt = \n", "def generate_corrected_transcript(temperature, system_prompt, audio_file):\n", " response = client.chat.completions.create(\n", " model=\"gpt-3.5-turbo-16k\",\n", " temperature=temperature,\n", " messages=[\n", " {\n", " \"role\": \"system\",\n", " \"content\": system_prompt\n", " },\n", " {\n", " \"role\": \"user\",\n", " \"content\": transcribe(audio_file)\n", " }\n", " ]\n", " )\n", " return response.choices[0].message.content\n" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "audio_fil = open(\"../data/output.mp3\", \"rb\")\n", "corrected_text = generate_corrected_transcript(0.7, system_prompt, audio_file=audio_fil)\n" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'My name is Testimony.'" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "corrected_text" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 2 }