Spaces:

bkoz
/

bk-sandbox

Running on Zero

App Files Files Community

bkoz commited on May 18, 2024

Commit

eaa6aa4

1 Parent(s): b5d0ba6

updated

Browse files

Files changed (1) hide show

app.py +11 -69

app.py CHANGED Viewed

@@ -1,73 +1,15 @@
-import gradio as gr
 import spaces
-# import torch
-from huggingface_hub import hf_hub_download
-from llama_cpp import Llama, LlamaGrammar
-# zero = torch.Tensor([0]).cuda()
-# print(f'zero.device: {zero.device}') # <-- 'cpu' 🤔
 @spaces.GPU
-def greet(n):
-    global llm
-    llm = load_model(download_model())
-    # print(f'zero.device: {zero.device}') # <-- 'cuda:0' 🤗
-    grammar = LlamaGrammar.from_string('''
-    root ::= sentence
-    answer ::= (weather | complaint | yesno | gen)
-    weather ::= ("Sunny." | "Cloudy." | "Rainy.")
-    complaint ::= "I don't like talking about the weather."
-    yesno ::= ("Yes." | "No.")
-    gen ::= "1. " [A-Z] [a-z] [a-z]*
-    sentence ::= [A-Z] [A-Za-z0-9 ,-]* ("." | "!" | "?")
-    ''')
-    prompts = [
-        "How's the weather in London?",
-        "How's the weather in Munich?",
-        "How's the weather in Barcelona?",
-    ]
-    print(f'Making a big inference...... {prompts[0]}')
-    output = llm(
-            prompts[0],
-            max_tokens=512,
-            temperature=0.4,
-            grammar=grammar
-    )
-    print(f'Returned..... {output}')
-    s = output['choices'][0]['text']
-    print(f'{s} , len(s) = {len(s)}')
-    print(output['choices'])
-    print(output['choices'][0]['text'])
-    print()
-    return f"Hello {s} Tensor"
-def download_model():
-    REPO_ID = "TheBloke/Llama-2-7B-GGUF"
-    FILENAME = "llama-2-7b.Q5_K_S.gguf"
-    print(f'Downloading model {REPO_ID}/{FILENAME}')
-    m = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
-    print(f'status: {m}')
-    return m
-def load_model(fp):
-    from llama_cpp import Llama, LlamaGrammar
-    print(f'Loading model: {fp}')
-    model_file=fp
-    llm = Llama(
-        model_path=model_file,
-        n_gpu_layers=-1, verbose=True
-    )
-    return llm
-demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
-demo.launch(share=False)

 import spaces
+from diffusers import DiffusionPipeline
+pipe = DiffusionPipeline.from_pretrained(...)
+pipe.to('cuda')
 @spaces.GPU
+def generate(prompt):
+    return pipe(prompt).images
+gr.Interface(
+    fn=generate,
+    inputs=gr.Text(),
+    outputs=gr.Gallery(),
+).launch()