RinInori commited on
Commit
259cdf8
1 Parent(s): 1840778

Upload app_v0.1.py

Browse files

Chat bot app without my fine tuning dataset

Files changed (1) hide show
  1. app_v0.1.py +55 -0
app_v0.1.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from peft import PeftModel
3
+ import transformers
4
+ import gradio as gr
5
+ from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
6
+ from transformers import Trainer
7
+
8
+ BASE_MODEL = "TheBloke/vicuna-7B-1.1-HF"
9
+
10
+ model = LlamaForCausalLM.from_pretrained(
11
+ BASE_MODEL,
12
+ torch_dtype=torch.float16,
13
+ load_in_8bit=True,
14
+ device_map = "auto",
15
+ offload_folder="./cache",
16
+ )
17
+
18
+ tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)
19
+ tokenizer.pad_token_id = 0
20
+ tokenizer.padding_side = "left"
21
+
22
+ def format_prompt(prompt: str) -> str:
23
+ return f"### Human: {prompt}\n### Assistant:"
24
+
25
+ generation_config = GenerationConfig(
26
+ max_new_tokens=128,
27
+ temperature=0.2,
28
+ repetition_penalty=1.0,
29
+ )
30
+
31
+ def generate_text(prompt: str):
32
+ formatted_prompt = format_prompt(prompt)
33
+
34
+ inputs = tokenizer(
35
+ formatted_prompt,
36
+ padding=False,
37
+ add_special_tokens=False,
38
+ return_tensors="pt"
39
+ ).to(model.device)
40
+
41
+ with torch.inference_mode():
42
+ tokens = model.generate(**inputs, generation_config=generation_config)
43
+
44
+ response = tokenizer.decode(tokens[0], skip_special_tokens=True)
45
+ assistant_index = response.find("### Assistant:") + len("### Assistant:")
46
+ return response[assistant_index:].strip()
47
+
48
+ iface = gr.Interface(
49
+ fn=generate_text,
50
+ inputs="text",
51
+ outputs="text",
52
+ title="Chatbot",
53
+ description="This vicuna app is using this model: https://huggingface.co/TheBloke/vicuna-7B-1.1-HF"
54
+ )
55
+ iface.launch()