yuzaa commited on
Commit
4a25f99
·
verified ·
1 Parent(s): 1bbb766

fix vision-only inference

Browse files
Files changed (1) hide show
  1. modeling_minicpmo.py +4 -3
modeling_minicpmo.py CHANGED
@@ -484,9 +484,7 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
484
  Returns:
485
  List[List[torch.Tensor]]: audio embeddings
486
  """
487
- dtype = self.apm.embed_positions.weight.dtype
488
- device = self.apm.embed_positions.weight.device
489
-
490
  wavforms = data.get("audio_features", []) # (bs, 80, frames) or [], multi audios need filled in advance
491
  audio_feature_lens_raw = data.get("audio_feature_lens", []) # list, [[x1, x2], [y1], [z1]]
492
 
@@ -547,6 +545,9 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
547
  final_audio_embeds.append(target_audio_embeds)
548
  return final_audio_embeds
549
  elif self.training and dummy:
 
 
 
550
  dummy_wavs = torch.zeros((1, 80, 100), device=device, dtype=dtype)
551
  audio_states = self.apm(dummy_wavs, output_hidden_states=True).hidden_states[self.audio_encoder_layer]
552
 
 
484
  Returns:
485
  List[List[torch.Tensor]]: audio embeddings
486
  """
487
+
 
 
488
  wavforms = data.get("audio_features", []) # (bs, 80, frames) or [], multi audios need filled in advance
489
  audio_feature_lens_raw = data.get("audio_feature_lens", []) # list, [[x1, x2], [y1], [z1]]
490
 
 
545
  final_audio_embeds.append(target_audio_embeds)
546
  return final_audio_embeds
547
  elif self.training and dummy:
548
+ dtype = self.apm.embed_positions.weight.dtype
549
+ device = self.apm.embed_positions.weight.device
550
+
551
  dummy_wavs = torch.zeros((1, 80, 100), device=device, dtype=dtype)
552
  audio_states = self.apm(dummy_wavs, output_hidden_states=True).hidden_states[self.audio_encoder_layer]
553