Spaces:

archit11
/

Hindi_LLM_arena

Running on Zero

App Files Files Community

archit11 commited on Jul 26

Commit

fb8f6e3

•

1 Parent(s): 22ff5cb

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -2

app.py CHANGED Viewed

@@ -65,22 +65,24 @@ def log_comparison(model1_name: str, model2_name: str, question: str, answer1: s
     except requests.RequestException as e:
         print(f"Error sending log to server: {e}")
-# Function to prepare input
 def prepare_input(model_id: str, message: str, chat_history: List[Tuple[str, str]]):
     tokenizer = tokenizers[model_id]
     try:
         inputs = tokenizer(
             [x[1] for x in chat_history] + [message],
             return_tensors="pt",
             truncation=True,
             padding=True,
             max_length=MAX_INPUT_TOKEN_LENGTH,
         )
     except Exception as e:
         print(f"Error preparing input for model {model_id}: {e}")
-        inputs = tokenizer([message], return_tensors="pt", padding=True, max_length=MAX_INPUT_TOKEN_LENGTH)
     return inputs
 # Function to generate responses from models
 @spaces.GPU(duration=120)
 def generate(
@@ -96,16 +98,26 @@ def generate(
     inputs = prepare_input(model_id, message, chat_history)
     input_ids = inputs.input_ids
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
     try:
         streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
         generate_kwargs = dict(
             input_ids=input_ids,
             streamer=streamer,
             max_new_tokens=max_new_tokens,
             do_sample=True,
@@ -125,6 +137,7 @@ def generate(
         print(f"Error generating response from model {model_id}: {e}")
         yield "Error generating response."
 # Function to compare two models
 def compare_models(
     model1_name: str,

     except requests.RequestException as e:
         print(f"Error sending log to server: {e}")
 def prepare_input(model_id: str, message: str, chat_history: List[Tuple[str, str]]):
     tokenizer = tokenizers[model_id]
     try:
+        # Prepare inputs for the model
         inputs = tokenizer(
             [x[1] for x in chat_history] + [message],
             return_tensors="pt",
             truncation=True,
             padding=True,
             max_length=MAX_INPUT_TOKEN_LENGTH,
+            return_attention_mask=True  # Include the attention_mask
         )
     except Exception as e:
         print(f"Error preparing input for model {model_id}: {e}")
+        inputs = tokenizer([message], return_tensors="pt", padding=True, max_length=MAX_INPUT_TOKEN_LENGTH, return_attention_mask=True)
     return inputs
 # Function to generate responses from models
 @spaces.GPU(duration=120)
 def generate(
     inputs = prepare_input(model_id, message, chat_history)
     input_ids = inputs.input_ids
+    attention_mask = inputs.attention_mask  # Get attention_mask
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+        attention_mask = attention_mask[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
+    # Ensure batch size is 1
+    if input_ids.shape[0] != 1:
+        input_ids = input_ids[:1]
+        attention_mask = attention_mask[:1]
     input_ids = input_ids.to(model.device)
+    attention_mask = attention_mask.to(model.device)  # Move to the same device as input_ids
     try:
         streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
         generate_kwargs = dict(
             input_ids=input_ids,
+            attention_mask=attention_mask,  # Pass the attention_mask
             streamer=streamer,
             max_new_tokens=max_new_tokens,
             do_sample=True,
         print(f"Error generating response from model {model_id}: {e}")
         yield "Error generating response."
 # Function to compare two models
 def compare_models(
     model1_name: str,