def kl_divergence(alpha, num_classes, device=None):
if not device:
device = get_device()
ones = torch.ones([1, num_classes], dtype=torch.float32, device=device)
sum_alpha = torch.sum(alpha, dim=1, keepdim=True)
first_term = (
torch.lgamma(sum_alpha)
- torch.lgamma(alpha).sum(dim=1, keepdim=True)
+ torch.lgamma(ones).sum(dim=1, keepdim=True)
- torch.lgamma(ones.sum(dim=1, keepdim=True))
)
second_term = (
(alpha - ones)
.mul(torch.digamma(alpha) - torch.digamma(sum_alpha))
.sum(dim=1, keepdim=True)
)
kl = first_term + second_term
return kl
If you have other ideas, please let me know.