Spaces:
Runtime error
Runtime error
File size: 2,000 Bytes
7bae295 2e0c84b b8bacba 7bae295 1789da0 d3c5c8a ee743ce 7bae295 b8bacba 7bae295 2e0c84b b8bacba 7bae295 ee743ce b3fcc22 ee743ce b3fcc22 ee743ce b3fcc22 ee743ce b3fcc22 b8bacba ee743ce b8bacba b3fcc22 7bae295 ee743ce 7bae295 b3fcc22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import gradio as gr
import torch
from unsloth import FastLanguageModel
from transformers import TextStreamer
from transformers import AutoModelForCausalLM, AutoTokenizer
# Replace with your model name
#MODEL_NAME = "ssirikon/Gemma7b-bnb-Unsloth"
#MODEL_NAME = "unsloth/gemma-7b-bnb-4bit"
MODEL_NAME = "Lohith9459/QnAD2_gemma7b"
# Load the model and tokenizer
max_seq_length = 512
dtype = torch.bfloat16
load_in_4bit = True
#model = FastLanguageModel.from_pretrained(MODEL_NAME, max_seq_length=max_seq_length, dtype=dtype, load_in_4bit=load_in_4bit)
#tokenizer = model.tokenizer
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
def generate_answer(question):
instruction = "Generate an answer for the following question in less than two sentences."
formatted_text = f"""Below is an instruction that describes a task. \
Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Input:
{question}
### Response:
"""
inputs = tokenizer([formatted_text], return_tensors="pt").to("cuda")
text_streamer = TextStreamer(tokenizer)
generated_ids = model.generate(**inputs, streamer=text_streamer, max_new_tokens=512)
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
def get_answer(text):
start_tag = "### Response:"
# Find the start and end indices
start_idx = text.find(start_tag)
# Check if both tags are found
if start_idx == -1:
return None # Tags not found
# Extract content between the tags
answer = text[start_idx + len(start_tag):].strip()
return answer
return get_answer(generated_text)
# Create the Gradio interface
demo = gr.Interface(
fn=generate_answer,
inputs=gr.Textbox(lines=5, label="Ask Question on AI/ML"),
outputs=gr.Textbox(label="G-15 Gemma7b Model Generated Answer")
)
demo.launch() |