Update modeling_minicpmo.py

#6
Files changed (1) hide show
  1. modeling_minicpmo.py +4 -3
modeling_minicpmo.py CHANGED
@@ -392,7 +392,7 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
392
  [torch.arange(r[0], r[1], dtype=torch.long) for r in cur_image_bound]
393
  ).to(vllm_embedding.device)
394
 
395
- cur_vllm_emb.scatter_(
396
  0,
397
  image_indices.view(-1, 1).repeat(1, cur_vllm_emb.shape[-1]),
398
  cur_vs_hs.view(-1, cur_vs_hs.shape[-1]),
@@ -595,7 +595,7 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
595
  elif self.training:
596
  for i in range(bs):
597
  # dummy audio_embeddings
598
- input_embeddings += audio_embeddings[0].mean() * 0
599
 
600
  return input_embeddings
601
 
@@ -751,7 +751,7 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
751
  input_ids=None,
752
  pixel_values=None,
753
  tgt_sizes=None,
754
- audio_features=None,
755
  audio_feature_lens=None,
756
  image_bound=None,
757
  audio_bounds=None,
@@ -2655,6 +2655,7 @@ class ConditionalChatTTS(PreTrainedModel):
2655
  """
2656
 
2657
  config_class = ConditionalChatTTSConfig
 
2658
 
2659
  def __init__(self, config: ConditionalChatTTSConfig):
2660
  super().__init__(config)
 
392
  [torch.arange(r[0], r[1], dtype=torch.long) for r in cur_image_bound]
393
  ).to(vllm_embedding.device)
394
 
395
+ vllm_embedding[i] = vllm_embedding[i].scatter(
396
  0,
397
  image_indices.view(-1, 1).repeat(1, cur_vllm_emb.shape[-1]),
398
  cur_vs_hs.view(-1, cur_vs_hs.shape[-1]),
 
595
  elif self.training:
596
  for i in range(bs):
597
  # dummy audio_embeddings
598
+ input_embeddings = input_embeddings + audio_embeddings[0].mean() * 0
599
 
600
  return input_embeddings
601
 
 
751
  input_ids=None,
752
  pixel_values=None,
753
  tgt_sizes=None,
754
+ audio_features=[],
755
  audio_feature_lens=None,
756
  image_bound=None,
757
  audio_bounds=None,
 
2655
  """
2656
 
2657
  config_class = ConditionalChatTTSConfig
2658
+ _no_split_modules = []
2659
 
2660
  def __init__(self, config: ConditionalChatTTSConfig):
2661
  super().__init__(config)