Spaces:

DR-Rakshitha
/

wizardlm_api

Runtime error

App Files Files Community

DR-Rakshitha commited on Sep 15, 2023

Commit

0010001

1 Parent(s): 251ca72

update app.py

Browse files

Files changed (1) hide show

app.py +41 -53

app.py CHANGED Viewed

@@ -1,60 +1,48 @@
-import os
-import urllib.request
 import gradio as gr
-from llama_cpp import Llama
-def download_file(file_link, filename):
-    # Checks if the file already exists before downloading
-    if not os.path.isfile(filename):
-        urllib.request.urlretrieve(file_link, filename)
-        print("File downloaded successfully.")
-    else:
-        print("File already exists.")
-# Dowloading GGML model from HuggingFace
-ggml_model_path = "https://huggingface.co/TheBloke/WizardLM-13B-V1-1-SuperHOT-8K-GGML/blob/main/wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin"
-filename = "wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin"
-download_file(ggml_model_path, filename)
-llm = Llama(model_path=filename, n_ctx=512, n_batch=126)
-def generate_text(prompt="Who is the CEO of Apple?"):
-    output = llm(
-        prompt,
-        max_tokens=256,
-        temperature=0.1,
-        top_p=0.5,
-        echo=False,
-        stop=["#"],
     )
-    output_text = output["choices"][0]["text"].strip()
-    # Remove Prompt Echo from Generated Text
-    cleaned_output_text = output_text.replace(prompt, "")
-    return cleaned_output_text
-description = "Vicuna-7B"
-examples = [
-    ["What is the capital of France?", "The capital of France is Paris."],
-    [
-        "Who wrote the novel 'Pride and Prejudice'?",
-        "The novel 'Pride and Prejudice' was written by Jane Austen.",
-    ],
-    ["What is the square root of 64?", "The square root of 64 is 8."],
-]
-gradio_interface = gr.Interface(
     fn=generate_text,
-    inputs="text",
-    outputs="text",
-    examples=examples,
-    title="Vicuna-7B",
-)
-gradio_interface.launch()

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+from gpt4all import GPT4All
+model = GPT4All("wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin")
+# model = AutoModelForCausalLM.from_pretrained(
+#     "tiiuae/falcon-7b-instruct",
+#     torch_dtype=torch.bfloat16,
+#     trust_remote_code=True,
+#     device_map="auto",
+#     low_cpu_mem_usage=True,
+# )
+# tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
+def generate_text(input_text):
+    # input_ids = tokenizer.encode(input_text, return_tensors="pt")
+    # attention_mask = torch.ones(input_ids.shape)
+    output = model.generate(
+        input_text
+        # input_ids,
+        # attention_mask=attention_mask,
+        # max_length=200,
+        # do_sample=True,
+        # top_k=10,
+        # num_return_sequences=1,
+        # eos_token_id=tokenizer.eos_token_id,
     )
+    # output_text = tokenizer.decode(output[0], skip_special_tokens=True)
+    # print(output_text)
+    # Remove Prompt Echo from Generated Text
+    # cleaned_output_text = output_text.replace(input_text, "")
+    return output
+text_generation_interface = gr.Interface(
     fn=generate_text,
+    inputs=[
+        gr.inputs.Textbox(label="Input Text"),
+    ],
+    outputs=gr.inputs.Textbox(label="Generated Text"),
+    title="Falcon-7B Instruct",
+).launch()