ndn1954 commited on
Commit
994cb8e
·
1 Parent(s): add5f12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +457 -284
app.py CHANGED
@@ -1,287 +1,460 @@
1
- # app.py
2
- from typing import List, Union, Optional
3
-
4
- from dotenv import load_dotenv, find_dotenv
5
- from langchain.callbacks import get_openai_callback
6
- from langchain.chat_models import ChatOpenAI
7
- from langchain.embeddings.openai import OpenAIEmbeddings
8
- from langchain.schema import (SystemMessage, HumanMessage, AIMessage)
9
- from langchain.llms import LlamaCpp
10
- from langchain.embeddings import LlamaCppEmbeddings
11
- from langchain.callbacks.manager import CallbackManager
12
- from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
13
- from langchain.text_splitter import TokenTextSplitter
14
- from langchain.prompts import PromptTemplate
15
- from langchain.vectorstores import Qdrant
16
- from PyPDF2 import PdfReader
17
- import streamlit as st
18
-
19
- # Use a pipeline as a high-level helper
20
- #from transformers import pipeline
21
- #pipe = pipeline("text-generation", model="TheBloke/Llama-2-7B-Chat-GGML")
22
- # Load model directly
23
- #from transformers import AutoModel
24
- #model = AutoModel.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML")
25
-
26
- from transformers import AutoModelForCausalLM, AutoTokenizer
27
- model_id="TheBloke/Llama-2-7B-Chat-GGML"
28
- tokenizer=AutoTokenizer.from_pretrained(model_id)
29
- model=AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
30
-
31
- PROMPT_TEMPLATE = """
32
- Use the following pieces of context enclosed by triple backquotes to answer the question at the end.
33
- \n\n
34
- Context:
35
- ```
36
- {context}
37
- ```
38
- \n\n
39
- Question: [][][][]{question}[][][][]
40
- \n
41
- Answer:"""
42
-
43
-
44
- def init_page() -> None:
45
- st.set_page_config(
46
- page_title="Personal ChatGPT"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  )
48
- st.sidebar.title("Options")
49
-
50
-
51
- def init_messages() -> None:
52
- clear_button = st.sidebar.button("Clear Conversation", key="clear")
53
- if clear_button or "messages" not in st.session_state:
54
- st.session_state.messages = [
55
- SystemMessage(
56
- content=(
57
- "You are a helpful AI QA assistant. "
58
- "When answering questions, use the context enclosed by triple backquotes if it is relevant. "
59
- "If you don't know the answer, just say that you don't know, "
60
- "don't try to make up an answer. "
61
- "Reply your answer in mardkown format.")
62
- )
63
- ]
64
- st.session_state.costs = []
65
-
66
-
67
- def get_pdf_text() -> Optional[str]:
68
- """
69
- Function to load PDF text and split it into chunks.
70
- """
71
- st.header("Document Upload")
72
- uploaded_file = st.file_uploader(
73
- label="Here, upload your PDF file you want ChatGPT to use to answer",
74
- type="pdf"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  )
76
- if uploaded_file:
77
- pdf_reader = PdfReader(uploaded_file)
78
- text = "\n\n".join([page.extract_text() for page in pdf_reader.pages])
79
- text_splitter = TokenTextSplitter(chunk_size=100, chunk_overlap=0)
80
- return text_splitter.split_text(text)
81
- else:
82
- return None
83
-
84
-
85
- def build_vectore_store(
86
- texts: str, embeddings: Union[OpenAIEmbeddings, LlamaCppEmbeddings]) \
87
- -> Optional[Qdrant]:
88
- """
89
- Store the embedding vectors of text chunks into vector store (Qdrant).
90
- """
91
- if texts:
92
- with st.spinner("Loading PDF ..."):
93
- qdrant = Qdrant.from_texts(
94
- texts,
95
- embeddings,
96
- path=":memory:",
97
- collection_name="my_collection",
98
- force_recreate=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  )
100
- st.success("File Loaded Successfully!!")
101
- else:
102
- qdrant = None
103
- return qdrant
104
-
105
-
106
- def select_llm() -> Union[ChatOpenAI, LlamaCpp]:
107
- """
108
- Read user selection of parameters in Streamlit sidebar.
109
- """
110
- model_name = st.sidebar.radio("Choose LLM:",
111
- ("llama-2-7b-chat.ggmlv3.q2_K",
112
- "gpt-3.5-turbo-0613",
113
- "gpt-3.5-turbo-16k-0613",
114
- "gpt-4"))
115
- temperature = st.sidebar.slider("Temperature:", min_value=0.0,
116
- max_value=1.0, value=0.0, step=0.01)
117
- return model_name, temperature
118
-
119
-
120
- def load_llm(model_name: str, temperature: float) -> Union[ChatOpenAI, LlamaCpp]:
121
- """
122
- Load LLM.
123
- """
124
- if model_name.startswith("gpt-"):
125
- return ChatOpenAI(temperature=temperature, model_name=model_name)
126
- elif model_name.startswith("llama-2-"):
127
- callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
128
-
129
- '''return LlamaCpp(
130
- model_path=f"./models/{model_name}.bin",
131
- input={"temperature": temperature,
132
- "max_length": 2048,
133
- "top_p": 1
134
- },
135
- n_ctx=2048,
136
- callback_manager=callback_manager,
137
- verbose=False, # True
138
- )'''
139
- return model
140
-
141
-
142
- def load_embeddings(model_name: str) -> Union[OpenAIEmbeddings, LlamaCppEmbeddings]:
143
- """
144
- Load embedding model.
145
- """
146
- if model_name.startswith("gpt-"):
147
- return OpenAIEmbeddings()
148
- elif model_name.startswith("llama-2-"):
149
- return LlamaCppEmbeddings(model_path=f"./models/{model_name}.bin")
150
-
151
-
152
- def get_answer(llm, messages) -> tuple[str, float]:
153
- """
154
- Get the AI answer to user questions.
155
- """
156
- if isinstance(llm, ChatOpenAI):
157
- with get_openai_callback() as cb:
158
- answer = llm(messages)
159
- return answer.content, cb.total_cost
160
- if isinstance(llm, LlamaCpp):
161
- return llm(llama_v2_prompt(convert_langchainschema_to_dict(messages))), 0.0
162
-
163
-
164
- def find_role(message: Union[SystemMessage, HumanMessage, AIMessage]) -> str:
165
- """
166
- Identify role name from langchain.schema object.
167
- """
168
- if isinstance(message, SystemMessage):
169
- return "system"
170
- if isinstance(message, HumanMessage):
171
- return "user"
172
- if isinstance(message, AIMessage):
173
- return "assistant"
174
- raise TypeError("Unknown message type.")
175
-
176
-
177
- def convert_langchainschema_to_dict(
178
- messages: List[Union[SystemMessage, HumanMessage, AIMessage]]) \
179
- -> List[dict]:
180
- """
181
- Convert the chain of chat messages in list of langchain.schema format to
182
- list of dictionary format.
183
- """
184
- return [{"role": find_role(message),
185
- "content": message.content
186
- } for message in messages]
187
-
188
-
189
- def llama_v2_prompt(messages: List[dict]) -> str:
190
- """
191
- Convert the messages in list of dictionary format to Llama2 compliant
192
- format.
193
- """
194
- B_INST, E_INST = "[INST]", "[/INST]"
195
- B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
196
- BOS, EOS = "<s>", "</s>"
197
- DEFAULT_SYSTEM_PROMPT = f"""You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""
198
-
199
- if messages[0]["role"] != "system":
200
- messages = [
201
- {
202
- "role": "system",
203
- "content": DEFAULT_SYSTEM_PROMPT,
204
- }
205
- ] + messages
206
- messages = [
207
- {
208
- "role": messages[1]["role"],
209
- "content": B_SYS + messages[0]["content"] + E_SYS + messages[1]["content"],
210
- }
211
- ] + messages[2:]
212
-
213
- messages_list = [
214
- f"{BOS}{B_INST} {(prompt['content']).strip()} {E_INST} {(answer['content']).strip()} {EOS}"
215
- for prompt, answer in zip(messages[::2], messages[1::2])
216
- ]
217
- messages_list.append(
218
- f"{BOS}{B_INST} {(messages[-1]['content']).strip()} {E_INST}")
219
-
220
- return "".join(messages_list)
221
-
222
-
223
- def extract_userquesion_part_only(content):
224
- """
225
- Function to extract only the user question part from the entire question
226
- content combining user question and pdf context.
227
- """
228
- content_split = content.split("[][][][]")
229
- if len(content_split) == 3:
230
- return content_split[1]
231
- return content
232
-
233
-
234
- def main() -> None:
235
- _ = load_dotenv(find_dotenv())
236
-
237
- init_page()
238
-
239
- model_name, temperature = select_llm()
240
- llm = load_llm(model_name, temperature)
241
- embeddings = load_embeddings(model_name)
242
-
243
- texts = get_pdf_text()
244
- qdrant = build_vectore_store(texts, embeddings)
245
-
246
- init_messages()
247
-
248
- st.header("Personal ChatGPT")
249
- # Supervise user input
250
- if user_input := st.chat_input("Input your question!"):
251
- if qdrant:
252
- context = [c.page_content for c in qdrant.similarity_search(
253
- user_input, k=10)]
254
- user_input_w_context = PromptTemplate(
255
- template=PROMPT_TEMPLATE,
256
- input_variables=["context", "question"]) \
257
- .format(
258
- context=context, question=user_input)
259
- else:
260
- user_input_w_context = user_input
261
- st.session_state.messages.append(
262
- HumanMessage(content=user_input_w_context))
263
- with st.spinner("ChatGPT is typing ..."):
264
- answer, cost = get_answer(llm, st.session_state.messages)
265
- st.session_state.messages.append(AIMessage(content=answer))
266
- st.session_state.costs.append(cost)
267
-
268
- # Display chat history
269
- messages = st.session_state.get("messages", [])
270
- for message in messages:
271
- if isinstance(message, AIMessage):
272
- with st.chat_message("assistant"):
273
- st.markdown(message.content)
274
- elif isinstance(message, HumanMessage):
275
- with st.chat_message("user"):
276
- st.markdown(extract_userquesion_part_only(message.content))
277
-
278
- costs = st.session_state.get("costs", [])
279
- st.sidebar.markdown("## Costs")
280
- st.sidebar.markdown(f"**Total cost: ${sum(costs):.5f}**")
281
- for cost in costs:
282
- st.sidebar.markdown(f"- ${cost:.5f}")
283
-
284
-
285
- # streamlit run app.py
286
- if __name__ == "__main__":
287
- main()
 
1
+ """Run codes."""
2
+ # pylint: disable=line-too-long, broad-exception-caught, invalid-name, missing-function-docstring, too-many-instance-attributes, missing-class-docstring
3
+ # ruff: noqa: E501
4
+ import gc
5
+ import os
6
+ import platform
7
+ import random
8
+ import time
9
+ from dataclasses import asdict, dataclass
10
+ from pathlib import Path
11
+
12
+ # from types import SimpleNamespace
13
+ import gradio as gr
14
+ import psutil
15
+ from about_time import about_time
16
+ from ctransformers import AutoModelForCausalLM
17
+ from dl_hf_model import dl_hf_model
18
+ from loguru import logger
19
+
20
+ filename_list = [
21
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q2_K.bin",
22
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_L.bin",
23
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_M.bin",
24
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_S.bin",
25
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_0.bin",
26
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin",
27
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin",
28
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_S.bin",
29
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_0.bin",
30
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_1.bin",
31
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_M.bin",
32
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_S.bin",
33
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q6_K.bin",
34
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q8_0.bin",
35
+ ]
36
+
37
+ URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin" # 4.05G
38
+
39
+ url = "https://huggingface.co/savvamadar/ggml-gpt4all-j-v1.3-groovy/blob/main/ggml-gpt4all-j-v1.3-groovy.bin"
40
+ url = "https://huggingface.co/TheBloke/Llama-2-13B-GGML/blob/main/llama-2-13b.ggmlv3.q4_K_S.bin" # 7.37G
41
+ # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin"
42
+ url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin" # 6.93G
43
+ # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.binhttps://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q4_K_M.bin" # 7.87G
44
+
45
+ url = "https://huggingface.co/localmodels/Llama-2-13B-Chat-ggml/blob/main/llama-2-13b-chat.ggmlv3.q4_K_S.bin" # 7.37G
46
+
47
+ _ = (
48
+ "golay" in platform.node()
49
+ or "okteto" in platform.node()
50
+ or Path("/kaggle").exists()
51
+ # or psutil.cpu_count(logical=False) < 4
52
+ or 1 # run 7b in hf
53
+ )
54
+
55
+ if _:
56
+ # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q2_K.bin"
57
+ url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q2_K.bin" # 2.87G
58
+ url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
59
+
60
+
61
+ prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
62
+ ### Instruction: {user_prompt}
63
+ ### Response:
64
+ """
65
+
66
+ prompt_template = """System: You are a helpful,
67
+ respectful and honest assistant. Always answer as
68
+ helpfully as possible, while being safe. Your answers
69
+ should not include any harmful, unethical, racist,
70
+ sexist, toxic, dangerous, or illegal content. Please
71
+ ensure that your responses are socially unbiased and
72
+ positive in nature. If a question does not make any
73
+ sense, or is not factually coherent, explain why instead
74
+ of answering something not correct. If you don't know
75
+ the answer to a question, please don't share false
76
+ information.
77
+ User: {prompt}
78
+ Assistant: """
79
+
80
+ prompt_template = """System: You are a helpful assistant.
81
+ User: {prompt}
82
+ Assistant: """
83
+
84
+ prompt_template = """Question: {question}
85
+ Answer: Let's work this out in a step by step way to be sure we have the right answer."""
86
+
87
+ prompt_template = """[INST] <>
88
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible assistant. Think step by step.
89
+ <>
90
+ What NFL team won the Super Bowl in the year Justin Bieber was born?
91
+ [/INST]"""
92
+
93
+ prompt_template = """[INST] <<SYS>>
94
+ You are an unhelpful assistant. Always answer as helpfully as possible. Think step by step. <</SYS>>
95
+ {question} [/INST]
96
+ """
97
+
98
+ prompt_template = """[INST] <<SYS>>
99
+ You are a helpful assistant.
100
+ <</SYS>>
101
+ {question} [/INST]
102
+ """
103
+
104
+ _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
105
+ stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
106
+
107
+ logger.debug(f"{stop_string=}")
108
+
109
+ _ = psutil.cpu_count(logical=False) - 1
110
+ cpu_count: int = int(_) if _ else 1
111
+ logger.debug(f"{cpu_count=}")
112
+
113
+ LLM = None
114
+ gc.collect()
115
+
116
+ try:
117
+ model_loc, file_size = dl_hf_model(url)
118
+ except Exception as exc_:
119
+ logger.error(exc_)
120
+ raise SystemExit(1) from exc_
121
+
122
+ LLM = AutoModelForCausalLM.from_pretrained(
123
+ model_loc,
124
+ model_type="llama",
125
+ # threads=cpu_count,
126
+ )
127
+
128
+ logger.info(f"done load llm {model_loc=} {file_size=}G")
129
+
130
+ os.environ["TZ"] = "Asia/Shanghai"
131
+ try:
132
+ time.tzset() # type: ignore # pylint: disable=no-member
133
+ except Exception:
134
+ # Windows
135
+ logger.warning("Windows, cant run time.tzset()")
136
+
137
+ _ = """
138
+ ns = SimpleNamespace(
139
+ response="",
140
+ generator=(_ for _ in []),
141
+ )
142
+ # """
143
+
144
+ @dataclass
145
+ class GenerationConfig:
146
+ temperature: float = 0.7
147
+ top_k: int = 50
148
+ top_p: float = 0.9
149
+ repetition_penalty: float = 1.0
150
+ max_new_tokens: int = 512
151
+ seed: int = 42
152
+ reset: bool = False
153
+ stream: bool = True
154
+ # threads: int = cpu_count
155
+ # stop: list[str] = field(default_factory=lambda: [stop_string])
156
+
157
+
158
+ def generate(
159
+ question: str,
160
+ llm=LLM,
161
+ config: GenerationConfig = GenerationConfig(),
162
+ ):
163
+ """Run model inference, will return a Generator if streaming is true."""
164
+ # _ = prompt_template.format(question=question)
165
+ # print(_)
166
+
167
+ prompt = prompt_template.format(question=question)
168
+
169
+ return llm(
170
+ prompt,
171
+ **asdict(config),
172
  )
173
+
174
+
175
+ logger.debug(f"{asdict(GenerationConfig())=}")
176
+
177
+
178
+ def user(user_message, history):
179
+ # return user_message, history + [[user_message, None]]
180
+ history.append([user_message, None])
181
+ return user_message, history # keep user_message
182
+
183
+
184
+ def user1(user_message, history):
185
+ # return user_message, history + [[user_message, None]]
186
+ history.append([user_message, None])
187
+ return "", history # clear user_message
188
+
189
+
190
+ def bot_(history):
191
+ user_message = history[-1][0]
192
+ resp = random.choice(["How are you?", "I love you", "I'm very hungry"])
193
+ bot_message = user_message + ": " + resp
194
+ history[-1][1] = ""
195
+ for character in bot_message:
196
+ history[-1][1] += character
197
+ time.sleep(0.02)
198
+ yield history
199
+
200
+ history[-1][1] = resp
201
+ yield history
202
+
203
+
204
+ def bot(history):
205
+ user_message = history[-1][0]
206
+ response = []
207
+
208
+ logger.debug(f"{user_message=}")
209
+
210
+ with about_time() as atime: # type: ignore
211
+ flag = 1
212
+ prefix = ""
213
+ then = time.time()
214
+
215
+ logger.debug("about to generate")
216
+
217
+ config = GenerationConfig(reset=True)
218
+ for elm in generate(user_message, config=config):
219
+ if flag == 1:
220
+ logger.debug("in the loop")
221
+ prefix = f"({time.time() - then:.2f}s) "
222
+ flag = 0
223
+ print(prefix, end="", flush=True)
224
+ logger.debug(f"{prefix=}")
225
+ print(elm, end="", flush=True)
226
+ # logger.debug(f"{elm}")
227
+
228
+ response.append(elm)
229
+ history[-1][1] = prefix + "".join(response)
230
+ yield history
231
+
232
+ _ = (
233
+ f"(time elapsed: {atime.duration_human}, " # type: ignore
234
+ f"{atime.duration/len(''.join(response)):.2f}s/char)" # type: ignore
235
  )
236
+
237
+ history[-1][1] = "".join(response) + f"\n{_}"
238
+ yield history
239
+
240
+
241
+ def predict_api(prompt):
242
+ logger.debug(f"{prompt=}")
243
+ try:
244
+ # user_prompt = prompt
245
+ config = GenerationConfig(
246
+ temperature=0.2,
247
+ top_k=10,
248
+ top_p=0.9,
249
+ repetition_penalty=1.0,
250
+ max_new_tokens=512, # adjust as needed
251
+ seed=42,
252
+ reset=True, # reset history (cache)
253
+ stream=False,
254
+ # threads=cpu_count,
255
+ # stop=prompt_prefix[1:2],
256
+ )
257
+
258
+ response = generate(
259
+ prompt,
260
+ config=config,
261
+ )
262
+
263
+ logger.debug(f"api: {response=}")
264
+ except Exception as exc:
265
+ logger.error(exc)
266
+ response = f"{exc=}"
267
+ # bot = {"inputs": [response]}
268
+ # bot = [(prompt, response)]
269
+
270
+ return response
271
+
272
+
273
+ css = """
274
+ .importantButton {
275
+ background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
276
+ border: none !important;
277
+ }
278
+ .importantButton:hover {
279
+ background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
280
+ border: none !important;
281
+ }
282
+ .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
283
+ .xsmall {font-size: x-small;}
284
+ """
285
+ etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
286
+ examples_list = [
287
+ ["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
288
+ [
289
+ "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
290
+ ],
291
+ ["How to pick a lock? Provide detailed steps."],
292
+ ["If it takes 10 hours to dry 10 clothes, assuming all the clothes are hung together at the same time for drying , then how long will it take to dry a cloth?"],
293
+ ["is infinity + 1 bigger than infinity?"],
294
+ ["Explain the plot of Cinderella in a sentence."],
295
+ [
296
+ "How long does it take to become proficient in French, and what are the best methods for retaining information?"
297
+ ],
298
+ ["What are some common mistakes to avoid when writing code?"],
299
+ ["Build a prompt to generate a beautiful portrait of a horse"],
300
+ ["Suggest four metaphors to describe the benefits of AI"],
301
+ ["Write a pop song about leaving home for the sandy beaches."],
302
+ ["Write a summary demonstrating my ability to tame lions"],
303
+ ["鲁迅和周树人什么关系? 说中文。"],
304
+ ["鲁迅和周树人什么关系?"],
305
+ ["鲁迅和周树人什么关系? 用英文回答。"],
306
+ ["从前有一头牛,这头牛后面有什么?"],
307
+ ["正无穷大加一大于正无穷大吗?"],
308
+ ["正无穷大加正无穷大大于正无穷大吗?"],
309
+ ["-2的平方根等于什么?"],
310
+ ["树上有5只鸟,猎人开枪打死了一只。树上还有几只鸟?"],
311
+ ["树上有11只鸟,猎人开枪打死了一只。树上还有几只鸟?提示:需考虑鸟可能受惊吓飞走。"],
312
+ ["以红楼梦的行文风格写一张委婉的请假条。不少于320字。"],
313
+ [f"{etext} 翻成中文,列出3个版本。"],
314
+ [f"{etext} \n 翻成中文,保留原意,但使用文学性的语言。不要写解释。列出3个版本。"],
315
+ ["假定 1 + 2 = 4, 试求 7 + 8。"],
316
+ ["给出判断一个数是不是质数的 javascript 码。"],
317
+ ["给出实现python 里 range(10)的 javascript 码。"],
318
+ ["给出实现python 里 [*(range(10)]的 javascript 码。"],
319
+ ["Erkläre die Handlung von Cinderella in einem Satz."],
320
+ ["Erkläre die Handlung von Cinderella in einem Satz. Auf Deutsch."],
321
+ ]
322
+
323
+ logger.info("start block")
324
+
325
+ with gr.Blocks(
326
+ title=f"{Path(model_loc).name}",
327
+ theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
328
+ css=css,
329
+ ) as block:
330
+ # buff_var = gr.State("")
331
+ with gr.Accordion("🎈 Info", open=False):
332
+ # gr.HTML(
333
+ # """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
334
+ # )
335
+ gr.Markdown(
336
+ f"""<h5><center>{Path(model_loc).name}</center></h4>
337
+ Most examples are meant for another model.
338
+ You probably should try to test
339
+ some related prompts.""",
340
+ elem_classes="xsmall",
341
+ )
342
+
343
+ # chatbot = gr.Chatbot().style(height=700) # 500
344
+ chatbot = gr.Chatbot(height=500)
345
+
346
+ # buff = gr.Textbox(show_label=False, visible=True)
347
+
348
+ with gr.Row():
349
+ with gr.Column(scale=5):
350
+ msg = gr.Textbox(
351
+ label="Chat Message Box",
352
+ placeholder="Ask me anything (press Shift+Enter or click Submit to send)",
353
+ show_label=False,
354
+ # container=False,
355
+ lines=6,
356
+ max_lines=30,
357
+ show_copy_button=True,
358
+ # ).style(container=False)
359
  )
360
+ with gr.Column(scale=1, min_width=50):
361
+ with gr.Row():
362
+ submit = gr.Button("Submit", elem_classes="xsmall")
363
+ stop = gr.Button("Stop", visible=True)
364
+ clear = gr.Button("Clear History", visible=True)
365
+ with gr.Row(visible=False):
366
+ with gr.Accordion("Advanced Options:", open=False):
367
+ with gr.Row():
368
+ with gr.Column(scale=2):
369
+ system = gr.Textbox(
370
+ label="System Prompt",
371
+ value=prompt_template,
372
+ show_label=False,
373
+ container=False,
374
+ # ).style(container=False)
375
+ )
376
+ with gr.Column():
377
+ with gr.Row():
378
+ change = gr.Button("Change System Prompt")
379
+ reset = gr.Button("Reset System Prompt")
380
+
381
+ with gr.Accordion("Example Inputs", open=True):
382
+ examples = gr.Examples(
383
+ examples=examples_list,
384
+ inputs=[msg],
385
+ examples_per_page=40,
386
+ )
387
+
388
+ # with gr.Row():
389
+ with gr.Accordion("Disclaimer", open=False):
390
+ _ = Path(model_loc).name
391
+ gr.Markdown(
392
+ f"Disclaimer: {_} can produce factually incorrect output, and should not be relied on to produce "
393
+ "factually accurate information. {_} was trained on various public datasets; while great efforts "
394
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
395
+ "biased, or otherwise offensive outputs.",
396
+ elem_classes=["disclaimer"],
397
+ )
398
+
399
+ msg_submit_event = msg.submit(
400
+ # fn=conversation.user_turn,
401
+ fn=user,
402
+ inputs=[msg, chatbot],
403
+ outputs=[msg, chatbot],
404
+ queue=True,
405
+ show_progress="full",
406
+ # api_name=None,
407
+ ).then(bot, chatbot, chatbot, queue=True)
408
+ submit_click_event = submit.click(
409
+ # fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
410
+ fn=user1, # clear msg
411
+ inputs=[msg, chatbot],
412
+ outputs=[msg, chatbot],
413
+ queue=True,
414
+ # queue=False,
415
+ show_progress="full",
416
+ # api_name=None,
417
+ ).then(bot, chatbot, chatbot, queue=True)
418
+ stop.click(
419
+ fn=None,
420
+ inputs=None,
421
+ outputs=None,
422
+ cancels=[msg_submit_event, submit_click_event],
423
+ queue=False,
424
+ )
425
+ clear.click(lambda: None, None, chatbot, queue=False)
426
+
427
+ with gr.Accordion("For Chat/Translation API", open=False, visible=False):
428
+ input_text = gr.Text()
429
+ api_btn = gr.Button("Go", variant="primary")
430
+ out_text = gr.Text()
431
+
432
+ api_btn.click(
433
+ predict_api,
434
+ input_text,
435
+ out_text,
436
+ api_name="api",
437
+ )
438
+
439
+ # block.load(update_buff, [], buff, every=1)
440
+ # block.load(update_buff, [buff_var], [buff_var, buff], every=1)
441
+
442
+ # concurrency_count=5, max_size=20
443
+ # max_size=36, concurrency_count=14
444
+ # CPU cpu_count=2 16G, model 7G
445
+ # CPU UPGRADE cpu_count=8 32G, model 7G
446
+
447
+ # does not work
448
+ _ = """
449
+ # _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
450
+ # concurrency_count = max(_, 1)
451
+ if psutil.cpu_count(logical=False) >= 8:
452
+ # concurrency_count = max(int(32 / file_size) - 1, 1)
453
+ else:
454
+ # concurrency_count = max(int(16 / file_size) - 1, 1)
455
+ # """
456
+
457
+ concurrency_count = 1
458
+ logger.info(f"{concurrency_count=}")
459
+
460
+ block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)