Spaces:

kjn1009
/

testllama

Runtime error

kjn1009 commited on Jul 14

Commit

6f95380

•

1 Parent(s): 46772b0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,30 +1,13 @@
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-# Prepare the input as before
-chat = [
- {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
- {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
-]
-# 1: Load the model and tokenizer
-model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
-tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
-# 2: Apply the chat template
-formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
-print("Formatted chat:\n", formatted_chat)
-# 3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
-inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
-# Move the tokenized inputs to the same device the model is on (GPU/CPU)
-inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
-print("Tokenized inputs:\n", inputs)
-# 4: Generate text from the model
-outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.)
-print("Generated tokens:\n", outputs)
-# 5: Decode the output back to a string
-decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True)
-print("Decoded output:\n", decoded_output)

+from transformers import AutoTokenizer
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("yanolja/EEVE-Korean-Instruct-10.8B-v1.0")
+tokenizer = AutoTokenizer.from_pretrained("yanolja/EEVE-Korean-Instruct-10.8B-v1.0")
+prompt_template = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\nHuman: {prompt}\nAssistant:\n"
+text = '한국의 수도는 어디인가요? 아래 선택지 중 골라주세요.\n\n(A) 경성\n(B) 부산\n(C) 평양\n(D) 서울\n(E) 전주'
+model_inputs = tokenizer(prompt_template.format(prompt=text), return_tensors='pt')
+outputs = model.generate(**model_inputs, max_new_tokens=256)
+output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+print(output_text)