Spaces:
Running
on
T4
Running
on
T4
File size: 2,236 Bytes
9e275b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
"""
Taken from ESPNet
Adapted by Flux
"""
import torch
from Architectures.GeneralLayers.DurationPredictor import DurationPredictorLoss
from Utility.utils import make_non_pad_mask
class StochasticToucanTTSLoss(torch.nn.Module):
def __init__(self):
super().__init__()
self.l1_criterion = torch.nn.L1Loss(reduction="none")
self.duration_criterion = DurationPredictorLoss(reduction="none")
self.mse_criterion = torch.nn.MSELoss(reduction="none")
def forward(self, after_outs, before_outs, gold_spectrograms, spectrogram_lengths, text_lengths):
"""
Args:
after_outs (Tensor): Batch of outputs after postnets (B, Lmax, odim).
before_outs (Tensor): Batch of outputs before postnets (B, Lmax, odim).
gold_spectrograms (Tensor): Batch of target features (B, Lmax, odim).
spectrogram_lengths (LongTensor): Batch of the lengths of each target (B,).
text_lengths (LongTensor): Batch of the lengths of each input (B,).
Returns:
Tensor: L1 loss value.
Tensor: Duration loss value
"""
# calculate loss
l1_loss = self.l1_criterion(before_outs, gold_spectrograms)
if after_outs is not None:
l1_loss = l1_loss + self.l1_criterion(after_outs, gold_spectrograms)
# make weighted mask and apply it
out_masks = make_non_pad_mask(spectrogram_lengths).unsqueeze(-1).to(gold_spectrograms.device)
out_masks = torch.nn.functional.pad(out_masks.transpose(1, 2), [0, gold_spectrograms.size(1) - out_masks.size(1), 0, 0, 0, 0], value=False).transpose(1, 2)
out_weights = out_masks.float() / out_masks.sum(dim=1, keepdim=True).float()
out_weights /= gold_spectrograms.size(0) * gold_spectrograms.size(2)
duration_masks = make_non_pad_mask(text_lengths).to(gold_spectrograms.device)
duration_weights = (duration_masks.float() / duration_masks.sum(dim=1, keepdim=True).float())
variance_masks = duration_masks.unsqueeze(-1)
variance_weights = duration_weights.unsqueeze(-1)
# apply weight
l1_loss = l1_loss.mul(out_weights).masked_select(out_masks).sum()
return l1_loss
|