KG0101 commited on
Commit
9cf2ed0
·
verified ·
1 Parent(s): 8bc28ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -17
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import spaces
2
  import torch
3
  import gradio as gr
4
- from transformers import AutoTokenizer, LlamaForCausalLM
5
- import bitsandbytes, flash_attn
6
  import os
7
 
8
  MODEL_NAME = "openai/whisper-large-v3-turbo"
@@ -19,16 +19,8 @@ pipe = pipeline(
19
  device=device,
20
  )
21
 
22
- # Load tokenizer and model for SOAP note generation
23
- tokenizer = AutoTokenizer.from_pretrained("NousResearch/Hermes-3-Llama-3.1-8B", trust_remote_code=True)
24
- model = LlamaForCausalLM.from_pretrained(
25
- "NousResearch/Hermes-3-Llama-3.1-8B",
26
- torch_dtype=torch.float16,
27
- device_map="auto",
28
- load_in_8bit=False,
29
- load_in_4bit=True,
30
- use_flash_attention_2=True
31
- )
32
 
33
  # Prompt for SOAP note generation
34
  sys_prompt = "You are a world class clinical assistant."
@@ -51,12 +43,17 @@ def transcribe(inputs, task):
51
  text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
52
  return text
53
 
54
- # Function to generate SOAP notes using LLM
55
  def generate_soap(transcribed_text):
56
- prompt = f"<|im_start|>system\n{sys_prompt}<|im_end|>\n<|im_start|>user\n{task_prompt}\n{transcribed_text}<|im_end|>\n<|im_start|>assistant"
57
- input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
58
- generated_ids = model.generate(input_ids, max_new_tokens=2048, temperature=0.8, repetition_penalty=1.1, do_sample=True, eos_token_id=tokenizer.eos_token_id)
59
- response = tokenizer.decode(generated_ids[0][input_ids.shape[-1]:], skip_special_tokens=True, clean_up_tokenization_space=True)
 
 
 
 
 
60
  return response
61
 
62
  # Gradio Interfaces for different inputs
 
1
  import spaces
2
  import torch
3
  import gradio as gr
4
+ from transformers import pipeline
5
+ from llama_cpp import Llama
6
  import os
7
 
8
  MODEL_NAME = "openai/whisper-large-v3-turbo"
 
19
  device=device,
20
  )
21
 
22
+ # Load the Llama model for SOAP note generation
23
+ llm = Llama(model_path="model.gguf", n_ctx=8000, n_threads=2, chat_format="chatml")
 
 
 
 
 
 
 
 
24
 
25
  # Prompt for SOAP note generation
26
  sys_prompt = "You are a world class clinical assistant."
 
43
  text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
44
  return text
45
 
46
+ # Function to generate SOAP notes using Llama model
47
  def generate_soap(transcribed_text):
48
+ prompt = [{"role": "system", "content": sys_prompt}]
49
+ prompt.append({"role": "user", "content": f"{task_prompt}\n{transcribed_text}"})
50
+
51
+ # Generate a response using the Llama model in streaming mode
52
+ stream_response = llm.create_chat_completion(messages=prompt, temperature=0.7, max_tokens=2048, stream=True)
53
+ response = ""
54
+ for chunk in stream_response:
55
+ if "content" in chunk['choices'][0]["delta"]:
56
+ response += chunk['choices'][0]["delta"]["content"]
57
  return response
58
 
59
  # Gradio Interfaces for different inputs