Spaces:

SalexAI
/

bearos

Runtime error

App Files Files Community

SalexAI commited on Apr 19

Commit

22bf868

verified ·

1 Parent(s): be93d30

Create app.py

Browse files

Files changed (1) hide show

app.py +147 -0

app.py ADDED Viewed

	@@ -0,0 +1,147 @@

+# app.py
+import os
+import asyncio
+import base64
+import io
+import traceback
+import cv2
+import pyaudio
+import PIL.Image
+import mss
+import gradio as gr
+from google import genai
+from google.genai import types
+# Audio and video capture config
+FORMAT = pyaudio.paInt16
+CHANNELS = 1
+SEND_SAMPLE_RATE = 16000
+RECEIVE_SAMPLE_RATE = 24000
+CHUNK_SIZE = 1024
+MODEL = "models/gemini-2.0-flash-live-001"
+# Initialize Google GenAI client
+client = genai.Client(
+    http_options={"api_version": "v1beta"},
+    api_key=os.environ.get("GEMINI_API_KEY"),
+)
+# Live connect configuration
+CONFIG = types.LiveConnectConfig(
+    response_modalities=["audio"],
+    speech_config=types.SpeechConfig(
+        voice_config=types.VoiceConfig(
+            prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Puck")
+        )
+    ),
+    system_instruction=types.Content(
+        parts=[types.Part.from_text(text="You are Puck..." )]
+    ),
+)
+# AudioLoop class adapted for Gradio
+class AudioLoop:
+    def __init__(self, mode="camera"):
+        self.mode = mode
+        self.audio_in_queue = None
+        self.out_queue = None
+        self.session = None
+    async def _get_frame(self, cap):
+        ret, frame = cap.read()
+        if not ret:
+            return None
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        img = PIL.Image.fromarray(frame_rgb)
+        img.thumbnail((640, 480))
+        buf = io.BytesIO()
+        img.save(buf, format="JPEG")
+        return buf.getvalue()
+    async def _video_stream(self):
+        cap = await asyncio.to_thread(cv2.VideoCapture, 0)
+        try:
+            while True:
+                frame = await self._get_frame(cap)
+                if frame is None:
+                    break
+                await self.out_queue.put({"mime_type": "image/jpeg", "data": base64.b64encode(frame).decode()})
+                await asyncio.sleep(0.1)
+        finally:
+            cap.release()
+    async def _audio_stream(self):
+        mic_info = pya.get_default_input_device_info()
+        stream = await asyncio.to_thread(
+            pyaudio.PyAudio().open,
+            format=FORMAT,
+            channels=CHANNELS,
+            rate=SEND_SAMPLE_RATE,
+            input=True,
+            input_device_index=mic_info['index'],
+            frames_per_buffer=CHUNK_SIZE,
+        )
+        while True:
+            data = await asyncio.to_thread(stream.read, CHUNK_SIZE, False)
+            await self.out_queue.put({"data": data, "mime_type": "audio/pcm"})
+    async def send_realtime(self):
+        while True:
+            msg = await self.out_queue.get()
+            await self.session.send(input=msg)
+    async def receive_audio(self):
+        while True:
+            turn = self.session.receive()
+            async for response in turn:
+                if data := response.data:
+                    yield (None, data)
+                if text := response.text:
+                    yield (text, None)
+    async def run(self):
+        async with client.aio.live.connect(model=MODEL, config=CONFIG) as session:
+            self.session = session
+            self.audio_in_queue = asyncio.Queue()
+            self.out_queue = asyncio.Queue(maxsize=5)
+            tasks = []
+            tasks.append(asyncio.create_task(self._audio_stream()))
+            if self.mode == "camera":
+                tasks.append(asyncio.create_task(self._video_stream()))
+            tasks.append(asyncio.create_task(self.send_realtime()))
+            async for text, audio in self.receive_audio():
+                yield text, audio
+            for t in tasks:
+                t.cancel()
+# Gradio interface
+async def chat(mode="camera"):
+    """Starts a live chat session and yields (text, audio) tuples as they arrive."""
+    loop = AudioLoop(mode=mode)
+    async for t, a in loop.run():
+        yield t, a
+with gr.Blocks() as demo:
+    gr.Markdown("# Gemini Live API Web Chat\nUse your microphone and camera directly from the browser.")
+    mode = gr.Radio(choices=["camera", "screen", "none"], value="camera", label="Video Source")
+    chatbot = gr.Chatbot()
+    with gr.Row():
+        start = gr.Button("Start")
+        stop = gr.Button("Stop")
+    start.click(lambda m: chat(m), inputs=[mode], outputs=[chatbot], _js="(fn, inputs) => {fn(inputs).then(data => console.log(data));}")
+    demo.launch(server_name="0.0.0.0", share=True)
+# requirements.txt
+#
+# google-genai
+# opencv-python
+# pyaudio
+# pillow
+# mss
+# gradio