File size: 1,211 Bytes
683cf67 93aea48 448c406 683cf67 f1f9df6 666bc15 1e2ba54 666bc15 1e2ba54 666bc15 1e2ba54 666bc15 1e2ba54 f1f9df6 1e2ba54 683cf67 ed9cd5d 448c406 beb0ef6 78ebbc1 beb0ef6 666bc15 448c406 909f50c 448c406 93aea48 448c406 ed9cd5d 909f50c 683cf67 448c406 683cf67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import gradio as gr
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
# import requests
# url = "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin"
# response = requests.get(url)
# with open("nous-hermes-13b.ggmlv3.q4_0.bin", "wb") as f:
# f.write(response.content)
print("DONE")
def func(user):
template = """Question: {question}
Answer: Let's think step by step."""
prompt = PromptTemplate(template=template, input_variables=["question"])
local_path = (
"./nous-hermes-13b.ggmlv3.q4_0.bin"
)
# # Callbacks support token-wise streaming
# callbacks = [StreamingStdOutCallbackHandler()]
# Verbose is required to pass to the callback manager
llm = LlamaCpp(model_path="./nous-hermes-13b.ggmlv3.q4_0.bin", n_ctx=2048)
llm_chain = LLMChain(prompt=prompt, llm=llm)
question = user
llm_chain.run(question)
return llm_chain.run(question)
iface = gr.Interface(fn=func, inputs="text", outputs="text")
iface.launch() |