File size: 2,427 Bytes
be29a68
 
 
 
 
683cf67
f1f9df6
be29a68
1e2ba54
be29a68
1e2ba54
be29a68
1e2ba54
be29a68
 
1e2ba54
f1f9df6
be29a68
683cf67
be29a68
448c406
be29a68
 
 
448c406
be29a68
448c406
be29a68
a35163f
be29a68
67e542e
be29a68
a35163f
448c406
be29a68
 
a35163f
be29a68
a481b55
be29a68
 
 
a35163f
be29a68
a35163f
be29a68
 
666bc15
1111730
 
 
 
a35163f
1111730
a35163f
1111730
 
 
 
a35163f
1111730
a35163f
1111730
448c406
1111730
448c406
1111730
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler


# import requests

# url = "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin"

# response = requests.get(url)

# with open("nous-hermes-13b.ggmlv3.q4_0.bin", "wb") as f:
#     f.write(response.content)


print("DONE")

def func(user):
        
    template = """
    Your name is John and not a assistant but more like a chatbot. Respond precise not more words and act like a human. for example: user: How are you? You: I'm doing good how about you?. user: hello You: Hello how you doing?. Don't say How can I assist you today?.
    Question: {question}
    
    Answer: """
    
    prompt = PromptTemplate(template=template, input_variables=["question"])
    
    local_path = (
        "./model.bin"
    )

    
    # # Callbacks support token-wise streaming
    # callbacks = [StreamingStdOutCallbackHandler()]
    
    # Verbose is required to pass to the callback manager
    llm = LlamaCpp(model_path=("./model.bin"))
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    question = user
    llm_chain.run(question)

    return llm_chain.run(question)

iface = gr.Interface(fn=func, inputs="text", outputs="text")
iface.launch()

# import gradio as gr
# from langchain.llms import LlamaCpp
# from langchain import PromptTemplate, LLMChain
# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# print("DONE")

# def func(user):
#     template = """
#     Your name is John and not a assistant but more like a chatbot. Respond precise not more words and act like a human. for example: user: How are you? You: I'm doing good how about you?. user: hello You: Hello how you doing?. Don't say How can I assist you today?.
#     Question: {question}
    
#     Answer: """
    
#     prompt = PromptTemplate(template=template, input_variables=["question"])
    
#     local_path = "./nous-hermes-13b.ggmlv3.q4_0.bin"
    
#     llm = LlamaCpp(model_path=local_path)
#     llm_chain = LLMChain(prompt=prompt, llm=llm, streaming=True)  # Enable streaming mode
#     question = user
#     llm_chain.run(question)

#     return llm_chain.run(question)

# iface = gr.Interface(fn=func, inputs="text", outputs="text")
# iface.launch()