dsfdfsghgf commited on
Commit
9313118
·
verified ·
1 Parent(s): ce5f258

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -35
app.py CHANGED
@@ -1,55 +1,42 @@
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
 
4
- # Modellname für die kleinere Variante
5
- model_name = "Qwen/Qwen2.5-Math-1.5B-Instruct"
6
-
7
- # Überprüfen, ob eine GPU verfügbar ist
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
  # Modell und Tokenizer laden
11
- try:
12
- print("Lade Modell und Tokenizer...")
13
- model = AutoModelForCausalLM.from_pretrained(
14
- model_name,
15
- device_map="auto", # Modell auf verfügbare Geräte verteilen
16
- low_cpu_mem_usage=True, # Versucht, den Speicherverbrauch zu reduzieren
17
- trust_remote_code=True,
18
- torch_dtype=torch.float16 # Reduziert den Speicherverbrauch
19
- ).to(device).eval()
20
-
21
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
22
- print("Modell und Tokenizer geladen.")
23
- except Exception as e:
24
- print(f"Fehler beim Laden des Modells: {e}")
25
 
26
- # Eingabe für das Gesprächsmodell erstellen
 
 
27
  chat = [
28
  {"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
29
  {"role": "user", "content": "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"}
30
  ]
31
 
32
- # Vorbereitung des Eingabeformats
33
- try:
34
- conversation_str = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False)
35
- inputs = tokenizer(conversation_str, return_tensors="pt", padding=True, truncation=True)
36
- print("Eingabe vorbereitet.")
37
- except Exception as e:
38
- print(f"Fehler bei der Eingabevorbereitung: {e}")
39
 
40
  # Inferenz durchführen
41
  try:
42
  with torch.no_grad():
43
  outputs = model.generate(
44
- input_ids=inputs["input_ids"].to(device),
45
- max_length=50 # Versuche mit einer kleineren Länge
 
46
  )
47
- print("Inferenz abgeschlossen.")
48
  except Exception as e:
49
  print(f"Fehler bei der Inferenz: {e}")
50
-
51
- # Ausgabe anzeigen
52
- try:
53
- print("Ausgabe: ", tokenizer.decode(outputs[0], skip_special_tokens=True))
54
- except Exception as e:
55
- print(f"Fehler bei der Ausgabe: {e}")
 
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
 
4
+ model_name = "Qwen/Qwen2.5-Math-7B-Instruct"
 
 
 
5
  device = "cuda" if torch.cuda.is_available() else "cpu"
6
 
7
  # Modell und Tokenizer laden
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_name,
10
+ device_map="auto",
11
+ low_cpu_mem_usage=True,
12
+ torch_dtype=torch.float16,
13
+ trust_remote_code=True
14
+ ).eval()
 
 
 
 
 
 
 
15
 
16
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
17
+
18
+ # Eingabe vorbereiten
19
  chat = [
20
  {"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
21
  {"role": "user", "content": "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"}
22
  ]
23
 
24
+ # Vorbereiten der Eingabe
25
+ conversation_str = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False)
26
+
27
+ input_ids = tokenizer.encode(conversation_str, return_tensors="pt", add_special_tokens=False).to(device)
28
+
29
+ # attention_mask erstellen
30
+ attention_mask = (input_ids != tokenizer.pad_token_id).long()
31
 
32
  # Inferenz durchführen
33
  try:
34
  with torch.no_grad():
35
  outputs = model.generate(
36
+ input_ids=input_ids,
37
+ max_new_tokens=100, # Passe dies an, je nach Bedarf
38
+ attention_mask=attention_mask
39
  )
40
+ print("Antwort generiert:", tokenizer.decode(outputs[0], skip_special_tokens=True))
41
  except Exception as e:
42
  print(f"Fehler bei der Inferenz: {e}")