# import gradio as gr # from langchain.llms import LlamaCpp # from langchain import PromptTemplate, LLMChain # from langchain.llms import GPT4All # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # # import requests # # url = "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin" # # response = requests.get(url) # # with open("nous-hermes-13b.ggmlv3.q4_0.bin", "wb") as f: # # f.write(response.content) # print("DONE") # def func(user): # template = """ # Your name is John and not a assistant but more like a chatbot. Respond precise not more words and act like a human. for example: user: How are you? You: I'm doing good how about you?. user: hello You: Hello how you doing?. Don't say How can I assist you today?. # Question: {question} # Answer: """ # prompt = PromptTemplate(template=template, input_variables=["question"]) # local_path = ( # "./nous-hermes-13b.ggmlv3.q4_0.bin" # ) # # # Callbacks support token-wise streaming # # callbacks = [StreamingStdOutCallbackHandler()] # # Verbose is required to pass to the callback manager # llm = LlamaCpp(model_path="./nous-hermes-13b.ggmlv3.q4_0.bin", n_ctx=2048) # llm_chain = LLMChain(prompt=prompt, llm=llm) # question = user # llm_chain.run(question) # return llm_chain.run(question) # iface = gr.Interface(fn=func, inputs="text", outputs="text") # iface.launch() # import gradio as gr # from langchain.llms import LlamaCpp # from langchain import PromptTemplate, LLMChain # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # print("DONE") # def func(user): # template = """ # Your name is John and not a assistant but more like a chatbot. Respond precise not more words and act like a human. for example: user: How are you? You: I'm doing good how about you?. user: hello You: Hello how you doing?. Don't say How can I assist you today?. # Question: {question} # Answer: """ # prompt = PromptTemplate(template=template, input_variables=["question"]) # local_path = "./nous-hermes-13b.ggmlv3.q4_0.bin" # llm = LlamaCpp(model_path=local_path) # llm_chain = LLMChain(prompt=prompt, llm=llm, streaming=True) # Enable streaming mode # question = user # llm_chain.run(question) # return llm_chain.run(question) # iface = gr.Interface(fn=func, inputs="text", outputs="text") # iface.launch() import gradio as gr from gpt4allj import Model # Load the local model model = Model('./ggml-gpt4all-j.bin') # Define a function that generates the model's response given a prompt def generate_response(prompt): response = model.generate(prompt) return response # Create a Gradio interface with a text input and an output text box iface = gr.Interface( fn=generate_response, inputs="text", outputs="text", title="GPT-4 AllJ", description="Generate responses using GPT-4 AllJ model." ) # Run the Gradio interface iface.launch()