llm / app.py
tommy24's picture
Update app.py
18222e8
raw
history blame
2.5 kB
# import gradio as gr
# from langchain.llms import LlamaCpp
# from langchain import PromptTemplate, LLMChain
# from langchain.llms import GPT4All
# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
# # import requests
# # url = "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin"
# # response = requests.get(url)
# # with open("nous-hermes-13b.ggmlv3.q4_0.bin", "wb") as f:
# # f.write(response.content)
# print("DONE")
# def func(user):
# template = """
# Your name is John and not a assistant but more like a chatbot. Respond precise not more words and act like a human. for example: user: How are you? You: I'm doing good how about you?. user: hello You: Hello how you doing?. Don't say How can I assist you today?.
# Question: {question}
# Answer: """
# prompt = PromptTemplate(template=template, input_variables=["question"])
# local_path = (
# "./nous-hermes-13b.ggmlv3.q4_0.bin"
# )
# # # Callbacks support token-wise streaming
# # callbacks = [StreamingStdOutCallbackHandler()]
# # Verbose is required to pass to the callback manager
# llm = LlamaCpp(model_path="./nous-hermes-13b.ggmlv3.q4_0.bin", n_ctx=2048)
# llm_chain = LLMChain(prompt=prompt, llm=llm)
# question = user
# llm_chain.run(question)
# return llm_chain.run(question)
# iface = gr.Interface(fn=func, inputs="text", outputs="text")
# iface.launch()
import gradio as gr
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
print("DONE")
def func(user):
template = """
Your name is John and not a assistant but more like a chatbot. Respond precise not more words and act like a human. for example: user: How are you? You: I'm doing good how about you?. user: hello You: Hello how you doing?. Don't say How can I assist you today?.
Question: {question}
Answer: """
prompt = PromptTemplate(template=template, input_variables=["question"])
local_path = "./nous-hermes-13b.ggmlv3.q4_0.bin"
llm = LlamaCpp(model_path=local_path)
llm_chain = LLMChain(prompt=prompt, llm=llm, streaming=True) # Enable streaming mode
question = user
llm_chain.run(question)
return llm_chain.run(question)
iface = gr.Interface(fn=func, inputs="text", outputs="text")
iface.launch()