AdithyaSK commited on
Commit
08cff1d
·
1 Parent(s): 06e4ab7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -0
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gpt4all import GPT4All
3
+ from huggingface_hub import hf_hub_download
4
+ import os
5
+
6
+ current_directory = os.getcwd()
7
+ model_directory = os.path.join(current_directory, "models")
8
+
9
+ title = "TaoScience"
10
+
11
+ description = """
12
+ <h1><center>LLM Finetuned on TaoScience<center></h1>
13
+ <h3><center>TaoGPT is a fine-tuned LLM on Tao Science by Dr. Rulin Xu and Dr. Zhi Gang Sha. <br> Check out- <a href='https://github.com/agencyxr/taogpt7B'>Github Repo</a> For More Information. 💬<h3><center>
14
+
15
+ """
16
+
17
+ NOMIC = """
18
+ <!DOCTYPE html>
19
+ <html>
20
+ <head>
21
+ <title>TaoGPT - DataMap</title>
22
+ <style>
23
+ iframe {
24
+ width: 100%;
25
+ height: 600px; /* You can adjust the height as needed */
26
+ border: 0;
27
+ }
28
+ </style>
29
+ </head>
30
+ <body>
31
+ <iframe
32
+ src="https://atlas.nomic.ai/map/c1ce06f4-7ed0-4c02-88a4-dd3b47bdf878/f2941fb8-0f36-4a23-8cbe-40dbf76ca9e4?xs=-41.09135&xf=41.12038&ys=-22.50394&yf=23.67273"
33
+ ></iframe>
34
+ </body>
35
+ </html>
36
+ """
37
+
38
+ model_path = "models"
39
+ model_name = "taogpt-v1-gguf.Q5_K_M.gguf"
40
+ if os.path.exists(model_directory) and os.path.isdir(model_directory):
41
+ print("Models folder already exits")
42
+ else:
43
+ hf_hub_download(repo_id="agency888/TaoGPT-v1-GGUF-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
44
+
45
+
46
+ print("Start the model init process")
47
+ model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
48
+ print("Finish the model init process")
49
+
50
+ model.config["promptTemplate"] = """{0}
51
+ """
52
+
53
+ model.config["systemPrompt"] = "In the Context of TaoScience answer this questions: "
54
+ model._is_chat_session_activated = False
55
+
56
+ max_new_tokens = 2048
57
+
58
+ def generator(message, history, temperature, top_p, top_k):
59
+ prompt = ""
60
+ for user_message, assistant_message in history:
61
+ prompt += model.config["promptTemplate"].format(user_message)
62
+
63
+ prompt += model.config["promptTemplate"].format(message)
64
+ outputs = []
65
+ for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
66
+ outputs.append(token)
67
+ yield "".join(outputs)
68
+
69
+ def vote(data: gr.LikeData):
70
+ if data.liked:
71
+ return
72
+ else:
73
+ return
74
+
75
+ chatbot = gr.Chatbot(bubble_full_width=False)
76
+
77
+ additional_inputs=[
78
+ gr.Slider(
79
+ label="temperature",
80
+ value=0.2,
81
+ minimum=0.0,
82
+ maximum=2.0,
83
+ step=0.05,
84
+ interactive=True,
85
+ info="Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.",
86
+ ),
87
+ gr.Slider(
88
+ label="top_p",
89
+ value=1.0,
90
+ minimum=0.0,
91
+ maximum=1.0,
92
+ step=0.01,
93
+ interactive=True,
94
+ info="0.1 means only the tokens comprising the top 10% probability mass are considered. Suggest set to 1 and use temperature. 1 means 100% and will disable it",
95
+ ),
96
+ gr.Slider(
97
+ label="top_k",
98
+ value=40,
99
+ minimum=0,
100
+ maximum=1000,
101
+ step=1,
102
+ interactive=True,
103
+ info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit.",
104
+ )
105
+ ]
106
+
107
+ with gr.Blocks() as demo:
108
+ gr.HTML("<h1><center>TaoGPTv0<center></h1>")
109
+ gr.HTML("<h3><center>TaoGPTv0 is a fine-tuned Mistal-7B model with a retrieval augmented generation pipeline on Tao Science by Dr. Rulin Xu and Dr. Zhi Gang Sha. Check out- <a href='https://github.com/agencyxr/taogpt7B'>Github Repo</a> For More Information. 💬<h3><center>")
110
+ with gr.Column():
111
+ with gr.Accordion(Visualise Training Data):
112
+ gr.HTML("<h3>Look into the dataset we used to finetune our model</h3>")
113
+ gr.HTML(NOMIC)
114
+ with gr.Column():
115
+ gr.ChatInterface(
116
+ fn = generator,
117
+ title=title,
118
+ description = description,
119
+ chatbot=chatbot,
120
+ additional_inputs=additional_inputs,
121
+ examples=[
122
+ ["What is TaoScience ?"],
123
+ ["TaoScience was written by ?"],
124
+ ["Tell me more about TaoScience"]],)
125
+ RAG_Checkbox = gr.Checkbox(label="Use Retrival Augmented Generation" , value=True , interactive=False)
126
+ gr.Markdown("The model is prone to Hallucination and many not always be Factual")
127
+
128
+ if __name__ == "__main__":
129
+ demo.queue(max_size=50).launch(share=True)