Spaces:
Paused
Paused
Added Gradio app
Browse files
app.py
CHANGED
@@ -1,71 +1,10 @@
|
|
1 |
|
2 |
-
from peft import PeftModel, PeftConfig
|
3 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
-
import torch
|
5 |
-
from transformers import AutoTokenizer
|
6 |
-
from peft import PeftModel, PeftConfig
|
7 |
-
|
8 |
-
config = PeftConfig.from_pretrained("TohidA/LlamaInstructMona")
|
9 |
-
model = AutoModelForCausalLM.from_pretrained("mlabonne/llama-2-7b-miniguanaco")
|
10 |
-
model = PeftModel.from_pretrained(model, "TohidA/LlamaInstructMona")
|
11 |
-
|
12 |
-
if torch.cuda.is_available():
|
13 |
-
model = model.cuda()
|
14 |
-
|
15 |
-
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
16 |
-
|
17 |
-
def prompt(instruction, input=''):
|
18 |
-
if input=='':
|
19 |
-
return f"Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{instruction} \n\n### Response:\n"
|
20 |
-
return f"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. \n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
|
21 |
-
|
22 |
-
tokenizer.pad_token = tokenizer.eos_token
|
23 |
-
tokenizer.pad_token_id = tokenizer.eos_token_id
|
24 |
-
|
25 |
-
def instruct(instruction, input='', temperature=0.7, top_p=0.95, top_k=4, max_new_tokens=128, do_sample=False, penalty_alpha=0.6, repetition_penalty=1., stop="\n\n"):
|
26 |
-
input_ids = tokenizer(prompt(instruction, input).strip(), return_tensors='pt').input_ids.cuda()
|
27 |
-
with torch.cuda.amp.autocast():
|
28 |
-
outputs = model.generate(
|
29 |
-
input_ids=input_ids,
|
30 |
-
return_dict_in_generate=True,
|
31 |
-
output_scores=True,
|
32 |
-
max_new_tokens=max_new_tokens,
|
33 |
-
temperature=temperature,
|
34 |
-
top_p=top_p,
|
35 |
-
top_k=top_k,
|
36 |
-
do_sample=do_sample,
|
37 |
-
repetition_penalty=repetition_penalty
|
38 |
-
)
|
39 |
-
if stop=="":
|
40 |
-
return tokenizer.decode(outputs.sequences[0], skip_special_tokens=True).split("### Response:")[1].strip(), prompt(instruction, input)
|
41 |
-
return tokenizer.decode(outputs.sequences[0], skip_special_tokens=True).split("### Response:")[1].strip().split(stop)[0].strip(), prompt(instruction, input)
|
42 |
-
|
43 |
-
|
44 |
-
import locale
|
45 |
-
locale.getpreferredencoding = lambda: "UTF-8"
|
46 |
-
|
47 |
import gradio as gr
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
temperature = gr.Slider(label="Temperature", minimum=0, maximum=1, value=0.7, step=0.05)
|
52 |
-
top_p = gr.Slider(label="Top-P", minimum=0, maximum=1, value=0.95, step=0.01)
|
53 |
-
top_k = gr.Slider(label="Top-K", minimum=0, maximum=128, value=40, step=1)
|
54 |
-
max_new_tokens = gr.Slider(label="Tokens", minimum=1, maximum=256, value=64)
|
55 |
-
do_sample = gr.Checkbox(label="Do Sample", value=True)
|
56 |
-
penalty_alpha = gr.Slider(minimum=0, maximum=1, value=0.5)
|
57 |
-
repetition_penalty = gr.Slider(minimum=1., maximum=2., value=1., step=0.1)
|
58 |
-
stop = gr.Textbox(label="Stopping Criteria", value="")
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
"""
|
65 |
-
gr.Interface(fn=instruct,
|
66 |
-
inputs=[instruction_text, input_text, temperature, top_p, top_k, max_new_tokens, do_sample, penalty_alpha, repetition_penalty, stop],
|
67 |
-
outputs=[output_text, output_prompt],
|
68 |
-
title="InstructLlamaMONA 7B Gradio Demo", description=description).launch(
|
69 |
-
debug=True,
|
70 |
-
share=True
|
71 |
-
)
|
|
|
1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import gradio as gr
|
3 |
|
4 |
+
def greet(name):
|
5 |
+
return "Hello " + name + "!!"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
|
8 |
+
|
9 |
+
if __name__ == "__main__":
|
10 |
+
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|