Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,55 +1,42 @@
|
|
1 |
import torch
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
|
4 |
-
|
5 |
-
model_name = "Qwen/Qwen2.5-Math-1.5B-Instruct"
|
6 |
-
|
7 |
-
# Überprüfen, ob eine GPU verfügbar ist
|
8 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
9 |
|
10 |
# Modell und Tokenizer laden
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
torch_dtype=torch.float16 # Reduziert den Speicherverbrauch
|
19 |
-
).to(device).eval()
|
20 |
-
|
21 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
22 |
-
print("Modell und Tokenizer geladen.")
|
23 |
-
except Exception as e:
|
24 |
-
print(f"Fehler beim Laden des Modells: {e}")
|
25 |
|
26 |
-
|
|
|
|
|
27 |
chat = [
|
28 |
{"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
|
29 |
{"role": "user", "content": "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"}
|
30 |
]
|
31 |
|
32 |
-
#
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
|
40 |
# Inferenz durchführen
|
41 |
try:
|
42 |
with torch.no_grad():
|
43 |
outputs = model.generate(
|
44 |
-
input_ids=
|
45 |
-
|
|
|
46 |
)
|
47 |
-
print("
|
48 |
except Exception as e:
|
49 |
print(f"Fehler bei der Inferenz: {e}")
|
50 |
-
|
51 |
-
# Ausgabe anzeigen
|
52 |
-
try:
|
53 |
-
print("Ausgabe: ", tokenizer.decode(outputs[0], skip_special_tokens=True))
|
54 |
-
except Exception as e:
|
55 |
-
print(f"Fehler bei der Ausgabe: {e}")
|
|
|
1 |
import torch
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
|
4 |
+
model_name = "Qwen/Qwen2.5-Math-7B-Instruct"
|
|
|
|
|
|
|
5 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
6 |
|
7 |
# Modell und Tokenizer laden
|
8 |
+
model = AutoModelForCausalLM.from_pretrained(
|
9 |
+
model_name,
|
10 |
+
device_map="auto",
|
11 |
+
low_cpu_mem_usage=True,
|
12 |
+
torch_dtype=torch.float16,
|
13 |
+
trust_remote_code=True
|
14 |
+
).eval()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
17 |
+
|
18 |
+
# Eingabe vorbereiten
|
19 |
chat = [
|
20 |
{"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
|
21 |
{"role": "user", "content": "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"}
|
22 |
]
|
23 |
|
24 |
+
# Vorbereiten der Eingabe
|
25 |
+
conversation_str = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False)
|
26 |
+
|
27 |
+
input_ids = tokenizer.encode(conversation_str, return_tensors="pt", add_special_tokens=False).to(device)
|
28 |
+
|
29 |
+
# attention_mask erstellen
|
30 |
+
attention_mask = (input_ids != tokenizer.pad_token_id).long()
|
31 |
|
32 |
# Inferenz durchführen
|
33 |
try:
|
34 |
with torch.no_grad():
|
35 |
outputs = model.generate(
|
36 |
+
input_ids=input_ids,
|
37 |
+
max_new_tokens=100, # Passe dies an, je nach Bedarf
|
38 |
+
attention_mask=attention_mask
|
39 |
)
|
40 |
+
print("Antwort generiert:", tokenizer.decode(outputs[0], skip_special_tokens=True))
|
41 |
except Exception as e:
|
42 |
print(f"Fehler bei der Inferenz: {e}")
|
|
|
|
|
|
|
|
|
|
|
|