LocalScribe1

Running on Zero

KG0101 commited on Nov 5, 2024

Commit

3de905c

verified ·

1 Parent(s): 9cf2ed0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,16 +1,25 @@
 import spaces
 import torch
 import gradio as gr
-from transformers import pipeline
 from llama_cpp import Llama
 import os
 MODEL_NAME = "openai/whisper-large-v3-turbo"
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
 device = 0 if torch.cuda.is_available() else "cpu"
 # Initialize the transcription pipeline
 pipe = pipeline(
     task="automatic-speech-recognition",
@@ -19,9 +28,6 @@ pipe = pipeline(
     device=device,
 )
-# Load the Llama model for SOAP note generation
-llm = Llama(model_path="model.gguf", n_ctx=8000, n_threads=2, chat_format="chatml")
 # Prompt for SOAP note generation
 sys_prompt = "You are a world class clinical assistant."
 task_prompt = """

 import spaces
 import torch
 import gradio as gr
+from transformers import pipeline, AutoModel, AutoTokenizer
 from llama_cpp import Llama
 import os
 MODEL_NAME = "openai/whisper-large-v3-turbo"
+MODEL_PATH = "model.gguf"  # Path to the downloaded model
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
 device = 0 if torch.cuda.is_available() else "cpu"
+# Download and load model if not already present
+if not os.path.exists(MODEL_PATH):
+    from huggingface_hub import hf_hub_download
+    hf_hub_download(repo_id="MaziyarPanahi/Qwen2-7B-Instruct-GGUF", filename="model.gguf", local_dir="./")
+# Load the Llama model with specified context and threading
+llm = Llama(model_path=MODEL_PATH, n_ctx=8000, n_threads=2, chat_format="chatml")
 # Initialize the transcription pipeline
 pipe = pipeline(
     task="automatic-speech-recognition",
     device=device,
 )
 # Prompt for SOAP note generation
 sys_prompt = "You are a world class clinical assistant."
 task_prompt = """