File size: 3,229 Bytes
20aaa1d ecdca72 9ba218c 403ec51 527ce70 9ba218c 403ec51 527ce70 c709e83 527ce70 9ba218c 527ce70 d9a6726 403ec51 d9a6726 ac34ade d9a6726 ac34ade d9a6726 ac34ade d9a6726 20aaa1d 9ba218c e7b33b8 9ba218c 168fd74 9ba218c 20aaa1d 9ba218c c9713fc 9ba218c 168fd74 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import gradio as gr
import torch
from gradio.components import Textbox
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import GenerationConfig
peft_model_id = "Ngadou/falcon-7b-scam-buster"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, trust_remote_code=True, return_dict=True, load_in_4bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
# Adapter model
model = PeftModel.from_pretrained(model, peft_model_id).to("cuda")
# def is_scam(instruction):
# max_new_tokens=128
# temperature=0.1
# top_p=0.75
# top_k=40
# num_beams=4
# instruction = instruction + ".\nIs this conversation a scam or not and why?"
# prompt = instruction + "\n### Solution:\n"
# inputs = tokenizer(prompt, return_tensors="pt")
# input_ids = inputs["input_ids"].to("cuda")
# attention_mask = inputs["attention_mask"].to("cuda")
# generation_config = GenerationConfig(
# temperature=temperature,
# top_p=top_p,
# top_k=top_k,
# num_beams=num_beams,
# )
# with torch.no_grad():
# generation_output = model.generate(
# input_ids=input_ids,
# attention_mask=attention_mask,
# generation_config=generation_config,
# return_dict_in_generate=True,
# output_scores=True,
# max_new_tokens=max_new_tokens,
# early_stopping=True
# )
# s = generation_output.sequences[0]
# output = tokenizer.decode(s)
# results = output.split("### Solution:")[1].lstrip("\n").split('\n')
# # The format of the output should be adjusted according to your model's output
# classification = results # Assumes first line is the classification
# #reason = results[1] if len(results) > 1 else "" # Assumes the rest is the reason
# return classification #, reason
def is_scam(instruction):
max_new_tokens=128
temperature=0.1
top_p=0.75
top_k=40
num_beams=4
instruction = instruction + "\n Is this conversation a scam or not and why?"
prompt = instruction + "\n### Solution:\n"
inputs = tokenizer(prompt, return_tensors="pt")
input_ids = inputs["input_ids"].to("cuda")
attention_mask = inputs["attention_mask"].to("cuda")
generation_config = GenerationConfig(
temperature=temperature,
top_p=top_p,
top_k=top_k,
num_beams=num_beams,
)
with torch.no_grad():
generation_output = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
generation_config=generation_config,
return_dict_in_generate=True,
output_scores=True,
max_new_tokens=max_new_tokens,
early_stopping=True
)
s = generation_output.sequences[0]
output = tokenizer.decode(s)
classification = output.split("### Solution:")[1].lstrip("\n")
print(classification)
return str(classification), " "
gr.Interface(
fn=is_scam,
inputs='text',
outputs= ['text','text']
).launch() |