import gradio as gr from langchain import PromptTemplate, LLMChain from langchain.llms import GPT4All from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler def func(prompt): template = """Question: {question} Answer: Let's think step by step.""" prompt = PromptTemplate(template=template, input_variables=["question"]) local_path = ( "https://tommy24-llm.hf.space/file=nous-hermes-13b.ggmlv3.q4_0.bin" # replace with your desired local file path ) # Callbacks support token-wise streaming callbacks = [StreamingStdOutCallbackHandler()] # Verbose is required to pass to the callback manager llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True) # If you want to use a custom model add the backend parameter # Check https://docs.gpt4all.io/gpt4all_python.html for supported backends llm = GPT4All(model=local_path, backend="gptj", callbacks=callbacks, verbose=True) llm_chain = LLMChain(prompt=prompt, llm=llm) question = prompt return llm_chain.run(question) iface = gr.Interface(fn=func, inputs="text", outputs="text") iface.launch()