Spaces:

abetlen
/

nanollava-gguf

Running on Zero

abetlen commited on May 12, 2024

Commit

4b12865

1 Parent(s): 226141b

Update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,21 +7,21 @@ import spaces
 from llama_cpp import Llama
 from llama_cpp.llama_chat_format import NanoLlavaChatHandler
-@spaces.GPU(duration=10)
 def answer_question(img, prompt):
-    chat_handler = NanoLlavaChatHandler.from_pretrained(
-        repo_id="abetlen/nanollava-gguf",
-        filename="*mmproj*",
-    )
-    llm = Llama.from_pretrained(
-        repo_id="abetlen/nanollava-gguf",
-        filename="*text-model*",
-        chat_handler=chat_handler,
-        n_ctx=2048,  # n_ctx should be increased to accommodate the image embedding
-        n_gpu_layers=-1,
-    )
     img_bytes = BytesIO()
     img.save(img_bytes, format='JPEG')

 from llama_cpp import Llama
 from llama_cpp.llama_chat_format import NanoLlavaChatHandler
+chat_handler = NanoLlavaChatHandler.from_pretrained(
+    repo_id="abetlen/nanollava-gguf",
+    filename="*mmproj*",
+)
+llm = Llama.from_pretrained(
+    repo_id="abetlen/nanollava-gguf",
+    filename="*text-model*",
+    chat_handler=chat_handler,
+    n_ctx=2048,  # n_ctx should be increased to accommodate the image embedding
+    n_gpu_layers=-1,
+    flash_attn=True,
+)
+@spaces.GPU(duration=30)
 def answer_question(img, prompt):
     img_bytes = BytesIO()
     img.save(img_bytes, format='JPEG')