File size: 1,211 Bytes
683cf67
93aea48
448c406
 
 
683cf67
f1f9df6
666bc15
1e2ba54
666bc15
1e2ba54
666bc15
1e2ba54
666bc15
 
1e2ba54
f1f9df6
1e2ba54
683cf67
ed9cd5d
448c406
 
 
 
 
 
 
beb0ef6
78ebbc1
beb0ef6
666bc15
448c406
909f50c
 
448c406
 
93aea48
448c406
ed9cd5d
909f50c
683cf67
448c406
 
 
683cf67
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler


# import requests

# url = "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin"

# response = requests.get(url)

# with open("nous-hermes-13b.ggmlv3.q4_0.bin", "wb") as f:
#     f.write(response.content)


print("DONE")

def func(user):
        
    template = """Question: {question}
    
    Answer: Let's think step by step."""
    
    prompt = PromptTemplate(template=template, input_variables=["question"])
    
    local_path = (
        "./nous-hermes-13b.ggmlv3.q4_0.bin"
    )

    
    # # Callbacks support token-wise streaming
    # callbacks = [StreamingStdOutCallbackHandler()]
    
    # Verbose is required to pass to the callback manager
    llm = LlamaCpp(model_path="./nous-hermes-13b.ggmlv3.q4_0.bin", n_ctx=2048)
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    question = user
    llm_chain.run(question)

    return llm_chain.run(question)

iface = gr.Interface(fn=func, inputs="text", outputs="text")
iface.launch()