llm

Sleeping

Chris4K commited on Jan 27

Commit

85f410a

verified ·

1 Parent(s): 33239be

Update services/strategy.py

Files changed (1) hide show

services/strategy.py CHANGED Viewed

@@ -54,6 +54,7 @@ class MajorityVotingStrategy(GenerationStrategy):
 class BestOfN(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
         scored_outputs = []
         for _ in range(num_samples):
@@ -65,9 +66,14 @@ class BestOfN(GenerationStrategy):
             response = generator.tokenizer.decode(output[0], skip_special_tokens=True)
             # Tokenize the response for scoring with the PRM model
             response_inputs = generator.tokenizer(response, return_tensors="pt").to(generator.device)
             prm_output = generator.prm_model(**response_inputs)  # Pass the inputs correctly to the model
-            score = prm_output.logits.mean().item()
             # Append the response and its score
             scored_outputs.append((response, score))
@@ -76,6 +82,7 @@ class BestOfN(GenerationStrategy):
         return max(scored_outputs, key=lambda x: x[1])[0]
 class BeamSearch(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
             input_ids = generator.tokenizer(prompt, return_tensors="pt").input_ids.to(generator.device)

 class BestOfN(GenerationStrategy):
+    @observe()
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
         scored_outputs = []
         for _ in range(num_samples):
             response = generator.tokenizer.decode(output[0], skip_special_tokens=True)
             # Tokenize the response for scoring with the PRM model
+            #TODO use the real tokenizer from generator
             response_inputs = generator.tokenizer(response, return_tensors="pt").to(generator.device)
+            # Pass the response inputs correctly to the PRM model
             prm_output = generator.prm_model(**response_inputs)  # Pass the inputs correctly to the model
+            # Check the expected output structure for prm_model and use it accordingly
+            score = prm_output.logits.mean().item() if hasattr(prm_output, 'logits') else 0.0
             # Append the response and its score
             scored_outputs.append((response, score))
         return max(scored_outputs, key=lambda x: x[1])[0]
 class BeamSearch(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
             input_ids = generator.tokenizer(prompt, return_tensors="pt").input_ids.to(generator.device)