divakaivan commited on
Commit
e153529
·
verified ·
1 Parent(s): 3e188e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -137,10 +137,11 @@ def predict(text, speaker):
137
 
138
  ### ### ###
139
  example = dataset['test'][11]
140
- speaker_embedding = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
141
-
142
- speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
143
- spectrogram = model.generate_speech(inputs["input_ids"], speaker_embedding)
 
144
  with torch.no_grad():
145
  speech = vocoder(spectrogram)
146
  # speech = model.generate_speech(input_ids, speaker_embedding, vocoder=vocoder)
 
137
 
138
  ### ### ###
139
  example = dataset['test'][11]
140
+ # speaker_embedding = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
141
+ speaker_embedding = speaker_embedding.expand(-1, inputs["input_ids"].size(1), -1)
142
+ spectrogram = model.generate_speech(inputs["input_ids"].to(device), speaker_embedding)
143
+ # speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
144
+ # spectrogram = model.generate_speech(inputs["input_ids"], speaker_embedding)
145
  with torch.no_grad():
146
  speech = vocoder(spectrogram)
147
  # speech = model.generate_speech(input_ids, speaker_embedding, vocoder=vocoder)