Spaces:

karths
/

types_issues

Runtime error

karths commited on Feb 21

Commit

dd28b0b

verified ·

1 Parent(s): 34063b9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,6 +10,11 @@ import logging
 import spaces
 from threading import Thread
 from collections.abc import Iterator
 # Setup logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
@@ -101,15 +106,16 @@ def llama_generate(
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
-    input_ids = llama_tokenizer.encode(message, return_tensors="pt").to(llama_model.device)
-    if input_ids.shape[1] > LLAMA_MAX_INPUT_TOKEN_LENGTH:
-        input_ids = input_ids[:, -LLAMA_MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {LLAMA_MAX_INPUT_TOKEN_LENGTH} tokens.")
     streamer = TextIteratorStreamer(llama_tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
-        {"input_ids": input_ids},
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,

 import spaces
 from threading import Thread
 from collections.abc import Iterator
+import csv
+# Increase CSV field size limit
+csv.field_size_limit(1000000)  # Or an even larger value if needed
 # Setup logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
+    inputs = llama_tokenizer(message, return_tensors="pt", padding=True, truncation=True, max_length=LLAMA_MAX_INPUT_TOKEN_LENGTH).to(llama_model.device)
+    #The line above was changed to add attention mask
+    if inputs.input_ids.shape[1] > LLAMA_MAX_INPUT_TOKEN_LENGTH:
+        inputs.input_ids = inputs.input_ids[:, -LLAMA_MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {LLAMA_MAX_INPUT_TOKEN_LENGTH} tokens.")
     streamer = TextIteratorStreamer(llama_tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
+        inputs,  # Pass the entire inputs dictionary
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,