KG0101 commited on
Commit
3de905c
·
verified ·
1 Parent(s): 9cf2ed0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -1,16 +1,25 @@
1
  import spaces
2
  import torch
3
  import gradio as gr
4
- from transformers import pipeline
5
  from llama_cpp import Llama
6
  import os
7
 
8
  MODEL_NAME = "openai/whisper-large-v3-turbo"
 
9
  BATCH_SIZE = 8
10
  FILE_LIMIT_MB = 1000
11
 
12
  device = 0 if torch.cuda.is_available() else "cpu"
13
 
 
 
 
 
 
 
 
 
14
  # Initialize the transcription pipeline
15
  pipe = pipeline(
16
  task="automatic-speech-recognition",
@@ -19,9 +28,6 @@ pipe = pipeline(
19
  device=device,
20
  )
21
 
22
- # Load the Llama model for SOAP note generation
23
- llm = Llama(model_path="model.gguf", n_ctx=8000, n_threads=2, chat_format="chatml")
24
-
25
  # Prompt for SOAP note generation
26
  sys_prompt = "You are a world class clinical assistant."
27
  task_prompt = """
 
1
  import spaces
2
  import torch
3
  import gradio as gr
4
+ from transformers import pipeline, AutoModel, AutoTokenizer
5
  from llama_cpp import Llama
6
  import os
7
 
8
  MODEL_NAME = "openai/whisper-large-v3-turbo"
9
+ MODEL_PATH = "model.gguf" # Path to the downloaded model
10
  BATCH_SIZE = 8
11
  FILE_LIMIT_MB = 1000
12
 
13
  device = 0 if torch.cuda.is_available() else "cpu"
14
 
15
+ # Download and load model if not already present
16
+ if not os.path.exists(MODEL_PATH):
17
+ from huggingface_hub import hf_hub_download
18
+ hf_hub_download(repo_id="MaziyarPanahi/Qwen2-7B-Instruct-GGUF", filename="model.gguf", local_dir="./")
19
+
20
+ # Load the Llama model with specified context and threading
21
+ llm = Llama(model_path=MODEL_PATH, n_ctx=8000, n_threads=2, chat_format="chatml")
22
+
23
  # Initialize the transcription pipeline
24
  pipe = pipeline(
25
  task="automatic-speech-recognition",
 
28
  device=device,
29
  )
30
 
 
 
 
31
  # Prompt for SOAP note generation
32
  sys_prompt = "You are a world class clinical assistant."
33
  task_prompt = """