llm-demo1-finalfinal

Runtime error

App Files Files Community

storresbusquets commited on Sep 17, 2023

Commit

48d5a82

1 Parent(s): e953355

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -23

app.py CHANGED Viewed

@@ -1,50 +1,67 @@
 import gradio as gr
 import torch
 import transformers
-# from transformers import AutoTokenizer
-from langchain import LLMChain, HuggingFacePipeline, PromptTemplate
-import os
-from ctransformers import AutoModelForCausalLM, AutoTokenizer
 # model = AutoModelForCausalLM.from_pretrained("marella/gpt-2-ggml", hf=True)
 # tokenizer = AutoTokenizer.from_pretrained(model)
-access_token = os.getenv("Llama2")
 def greet(text):
-    model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q4_K_S.bin', hf=True)
-    tokenizer = AutoTokenizer.from_pretrained(model)
     # model = "meta-llama/Llama-2-7b-hf"
     # tokenizer = AutoTokenizer.from_pretrained(model, token=access_token)
-    pipeline = transformers.pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
     # torch_dtype=torch.bfloat16,
     # trust_remote_code=True,
     # device_map="auto",
-    max_length=512,
-    max_new_tokens=256,
-    do_sample=True,
     # top_k=10,
-    num_return_sequences=1,
-    eos_token_id=tokenizer.eos_token_id,
     # token=access_token
     )
-    llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0,'repetition_penalty':1.1})
-    template = """Write a concise summary of the following:
-                "{text}"
-                CONCISE SUMMARY:"""
-    prompt = PromptTemplate(template=template, input_variables=["text"])
-    llm_chain = LLMChain(prompt=prompt, llm=llm)
-    return llm_chain.run(text)
 with gr.Blocks() as demo:

 import gradio as gr
 import torch
 import transformers
+from langchain.llms import CTransformers
+from langchain import PromptTemplate, LLMChain
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 # model = AutoModelForCausalLM.from_pretrained("marella/gpt-2-ggml", hf=True)
 # tokenizer = AutoTokenizer.from_pretrained(model)
+# access_token = os.getenv("Llama2")
 def greet(text):
+    llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q2_K.bin', callbacks=[StreamingStdOutCallbackHandler()])
+    template = """
+    [INST] <<SYS>>
+    You are a helpful, respectful and honest assistant that performs summaries of text. Write a concise summary of the following text.
+    <</SYS>>
+    {text}[/INST]
+    """
+    prompt = PromptTemplate(template=template, input_variables=["text"])
+    llm_chain = LLMChain(prompt=prompt, llm=llm)
+    summary = llm_chain.run(text)
+    return summary
+    # model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q4_K_S.bin', hf=True)
+    # tokenizer = AutoTokenizer.from_pretrained(model)
     # model = "meta-llama/Llama-2-7b-hf"
     # tokenizer = AutoTokenizer.from_pretrained(model, token=access_token)
+    # pipeline = transformers.pipeline(
+    # "text-generation",
+    # model=model,
+    # tokenizer=tokenizer,
     # torch_dtype=torch.bfloat16,
     # trust_remote_code=True,
     # device_map="auto",
+    # max_length=512,
+    # max_new_tokens=256,
+    # do_sample=True,
     # top_k=10,
+    # num_return_sequences=1,
+    # eos_token_id=tokenizer.eos_token_id,
     # token=access_token
     )
+    # llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0,'repetition_penalty':1.1})
+    # template = """Write a concise summary of the following:
+                # "{text}"
+                # CONCISE SUMMARY:"""
+    # prompt = PromptTemplate(template=template, input_variables=["text"])
+    # llm_chain = LLMChain(prompt=prompt, llm=llm)
+    # return llm_chain.run(text)
 with gr.Blocks() as demo: