Spaces:

TharunSivamani
/

phi-2-oasst1

Paused

App Files Files Community

TharunSivamani commited on Jan 20, 2024

Commit

61d0f76

verified ·

1 Parent(s): 01eb8d4

modified code

Browse files

Files changed (1) hide show

app.py +37 -27

app.py CHANGED Viewed

@@ -1,42 +1,52 @@
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, logging
 import gradio as gr
 model_name = "microsoft/phi-2"
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     trust_remote_code=True
 )
 model.config.use_cache = False
-adapter_path = 'checkpoint-500'
-model.load_adapter(adapter_path)
-tokenizer = AutoTokenizer.from_pretrained("checkpoint-500", trust_remote_code=True)
 tokenizer.pad_token = tokenizer.eos_token
-def generate_context(prompt, tokens=300):
-    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=tokens)
-    sentence = "[INST] " + prompt + " [/INST]"
-    result = pipe(sentence)
-    text = result[0]['generated_text']
-    return text[len(sentence):]
-examples = [
-    ["What is a large language model?", 250],
-    ["Explain the process of photosynthesis", 350]
-]
 demo = gr.Interface(
-    fn=generate_context,
-    inputs=[
-        gr.Textbox(label="How may I help you ? 🤖"),
-        gr.Slider(200, 500, value=300, label="Sentence length", step=50)
-    ],
-    outputs="text",
-    examples=examples
 )
-demo.launch(debug=True)

 import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,BitsAndBytesConfig
+#model = AutoModelForCausalLM.from_pretrained("checkpoint_500",trust_remote_code=True)
 model_name = "microsoft/phi-2"
+import os
+token = os.environ.get("HUGGING_FACE_TOKEN")
+#bnb_config = BitsAndBytesConfig(
+#    load_in_4bit=True,
+#    bnb_4bit_quant_type="nf4",
+#    bnb_4bit_compute_dtype=torch.float16,
+#)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    #quantization_config=bnb_config,
+    use_auth_token=token,
     trust_remote_code=True
 )
 model.config.use_cache = False
+model.load_adapter("checkpoint_500")
+tokenizer = AutoTokenizer.from_pretrained("checkpoint_500", trust_remote_code=True)
 tokenizer.pad_token = tokenizer.eos_token
+def inference(prompt, count):
+    count = int(count)
+    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
+    result = pipe(f"{prompt}",max_new_tokens=count)
+    out_text = result[0]['generated_text']
+    return out_text
+title = "TSAI S21 Assignment: Adaptive QLoRA training on open assist oasst1 dataset, using microsoft/phi2 model"
+description = "A simple Gradio interface that accepts a context and generates GPT like text "
+examples = [["What is a large language model?","50"]
+           ]
 demo = gr.Interface(
+    inference,
+    inputs = [gr.Textbox(placeholder="Enter a prompt"), gr.Textbox(placeholder="Enter number of characters you want to generate")],
+    outputs = [gr.Textbox(label="Chat GPT like text")],
+    title = title,
+    description = description,
+    examples = examples
 )
+demo.launch()