RinInori commited on
Commit
f474fd0
1 Parent(s): f230d96

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from peft import PeftModel
3
+ import transformers
4
+ import gradio as gr
5
+ from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
6
+ from transformers import Trainer
7
+
8
+ BASE_MODEL = "TheBloke/stable-vicuna-13B-HF"
9
+ model = LlamaForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True, torch_dtype=torch.float16, device_map="auto")
10
+ tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)
11
+ tokenizer.pad_token_id = 0
12
+ tokenizer.padding_side = "left"
13
+
14
+ def format_prompt(prompt: str) -> str:
15
+ return f"### Human: {prompt}\n### Assistant:"
16
+
17
+ generation_config = GenerationConfig(
18
+ max_new_tokens=128,
19
+ temperature=0.2,
20
+ repetition_penalty=1.0,
21
+ )
22
+
23
+ def generate_text(prompt: str):
24
+ formatted_prompt = format_prompt(prompt)
25
+
26
+ inputs = tokenizer(
27
+ formatted_prompt,
28
+ padding=False,
29
+ add_special_tokens=False,
30
+ return_tensors="pt"
31
+ ).to(model.device)
32
+
33
+ with torch.inference_mode():
34
+ tokens = model.generate(**inputs, generation_config=generation_config)
35
+
36
+ response = tokenizer.decode(tokens[0], skip_special_tokens=True)
37
+ assistant_index = response.find("### Assistant:") + len("### Assistant:")
38
+ return response[assistant_index:].strip()
39
+
40
+ iface = gr.Interface(fn=generate_text, inputs="text", outputs="text")
41
+ iface.launch()
42
+