import gradio as gr import spaces import subprocess # from transformers import AutoTokenizer, AutoModelForCausalLM result = subprocess.run(["python3", "-m", "pip", "install", "transformers==4.34.0"], shell=True, capture_output=True, text=True) print(result.stdout) model = None def greet(name): return "Hello " + name + "!!" @spaces.GPU def load_model(): print(f"Loading model...") model_path = "meta-llama/Meta-Llama-3-8B" tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained( model_path, torch_dtype=torch.bfloat16, device_map="auto", ) def main(): load_model() demo = gr.Interface(fn=greet, inputs="text", outputs="text") demo.launch() if __name__ == "__main__": main()