import gradio as gr # import torch import transformers # from transformers import AutoTokenizer from langchain import LLMChain, HuggingFacePipeline, PromptTemplate import os from ctransformers import AutoModelForCausalLM, AutoTokenizer model = AutoModelForCausalLM.from_pretrained("marella/gpt-2-ggml", hf=True) tokenizer = AutoTokenizer.from_pretrained(model) access_token = os.getenv("Llama2") def greet(text): model = AutoModelForCausalLM.from_pretrained("marella/gpt-2-ggml", hf=True) tokenizer = AutoTokenizer.from_pretrained(model) # model = "meta-llama/Llama-2-7b-hf" # tokenizer = AutoTokenizer.from_pretrained(model, token=access_token) pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", max_length=512, do_sample=False, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, token=access_token ) llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0}) template = """Write a concise summary of the following: "{text}" CONCISE SUMMARY:""" prompt = PromptTemplate(template=template, input_variables=["text"]) llm_chain = LLMChain(prompt=prompt, llm=llm) return llm_chain.run(text) with gr.Blocks() as demo: text = gr.Textbox(label="Text") summary = gr.Textbox(label="Summary") greet_btn = gr.Button("Submit") clear = gr.ClearButton([text, summary]) greet_btn.click(fn=greet, inputs=text, outputs=summary, api_name="greet") demo.launch()