Shortcuts

Source code for common.vision.models.reid.loss

"""
Modified from https://github.com/yxgeee/MMT
@author: Baixu Chen
@contact: [email protected]
"""
import torch
import torch.nn as nn
import torch.nn.functional as F


def pairwise_euclidean_distance(x, y):
    """Compute pairwise euclidean distance between two sets of features"""
    m, n = x.size(0), y.size(0)
    dist_mat = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) + \
               torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() \
               - 2 * torch.matmul(x, y.t())
    # for numerical stability
    dist_mat = dist_mat.clamp(min=1e-12).sqrt()
    return dist_mat


def hard_examples_mining(dist_mat, identity_mat, return_idxes=False):
    r"""Select hard positives and hard negatives according to `In defense of the Triplet Loss for Person
    Re-Identification (ICCV 2017) <https://arxiv.org/pdf/1703.07737v2.pdf>`_

    Args:
        dist_mat (tensor): pairwise distance matrix between two sets of features
        identity_mat (tensor): a matrix of shape :math:`(N, M)`. If two images :math:`P[i]` of set :math:`P` and
            :math:`Q[j]` of set :math:`Q` come from the same person, then :math:`identity\_mat[i, j] = 1`,
            otherwise :math:`identity\_mat[i, j] = 0`
        return_idxes (bool, optional): if True, also return indexes of hard examples. Default: False
    """
    # the implementation here is a little tricky, dist_mat contains pairwise distance between probe image and other
    # images in current mini-batch. As we want to select positive examples of the same person, we add a constant
    # negative offset on other images before sorting. As a result, images of the **same** person will rank first.
    sorted_dist_mat, sorted_idxes = torch.sort(dist_mat + (-1e7) * (1 - identity_mat), dim=1,
                                               descending=True)
    dist_ap = sorted_dist_mat[:, 0]
    hard_positive_idxes = sorted_idxes[:, 0]

    # the implementation here is similar to above code, we add a constant positive offset on images of same person
    # before sorting. Besides, we sort in ascending order. As a result, images of **different** persons will rank first.
    sorted_dist_mat, sorted_idxes = torch.sort(dist_mat + 1e7 * identity_mat, dim=1,
                                               descending=False)
    dist_an = sorted_dist_mat[:, 0]
    hard_negative_idxes = sorted_idxes[:, 0]
    if return_idxes:
        return dist_ap, dist_an, hard_positive_idxes, hard_negative_idxes
    return dist_ap, dist_an


[docs]class CrossEntropyLossWithLabelSmooth(nn.Module): r"""Cross entropy loss with label smooth from `Rethinking the Inception Architecture for Computer Vision (CVPR 2016) <https://arxiv.org/pdf/1512.00567.pdf>`_. Given one-hot labels :math:`labels \in R^C`, where :math:`C` is the number of classes, smoothed labels are calculated as .. math:: smoothed\_labels = (1 - \epsilon) \times labels + \epsilon \times \frac{1}{C} We use smoothed labels when calculating cross entropy loss and this can be helpful for preventing over-fitting. Args: num_classes (int): number of classes. epsilon (float): a float number that controls the smoothness. Inputs: - y (tensor): unnormalized classifier predictions, :math:`y` - labels (tensor): ground truth labels, :math:`labels` Shape: - y: :math:`(minibatch, C)`, where :math:`C` is the number of classes - labels: :math:`(minibatch, )` """ def __init__(self, num_classes, epsilon=0.1): super(CrossEntropyLossWithLabelSmooth, self).__init__() self.num_classes = num_classes self.epsilon = epsilon self.log_softmax = nn.LogSoftmax(dim=1).cuda() def forward(self, y, labels): log_prob = self.log_softmax(y) labels = torch.zeros_like(log_prob).scatter_(1, labels.unsqueeze(1), 1) labels = (1 - self.epsilon) * labels + self.epsilon / self.num_classes loss = (- labels * log_prob).mean(0).sum() return loss
[docs]class TripletLoss(nn.Module): """Triplet loss augmented with batch hard from `In defense of the Triplet Loss for Person Re-Identification (ICCV 2017) <https://arxiv.org/pdf/1703.07737v2.pdf>`_. Args: margin (float): margin of triplet loss normalize_feature (bool, optional): if True, normalize features into unit norm first before computing loss. Default: False. """ def __init__(self, margin, normalize_feature=False): super(TripletLoss, self).__init__() self.margin = margin self.normalize_feature = normalize_feature self.margin_loss = nn.MarginRankingLoss(margin=margin).cuda() def forward(self, f, labels): if self.normalize_feature: # equivalent to cosine similarity f = F.normalize(f) dist_mat = pairwise_euclidean_distance(f, f) n = dist_mat.size(0) identity_mat = labels.expand(n, n).eq(labels.expand(n, n).t()).float() dist_ap, dist_an = hard_examples_mining(dist_mat, identity_mat) y = torch.ones_like(dist_ap) loss = self.margin_loss(dist_an, dist_ap, y) return loss
class TripletLossXBM(nn.Module): r"""Triplet loss augmented with batch hard from `In defense of the Triplet Loss for Person Re-Identification (ICCV 2017) <https://arxiv.org/pdf/1703.07737v2.pdf>`_. The only difference from triplet loss lies in that both features from current mini batch and external storage (XBM) are involved. Args: margin (float, optional): margin of triplet loss. Default: 0.3 normalize_feature (bool, optional): if True, normalize features into unit norm first before computing loss. Default: False Inputs: - f (tensor): features of current mini batch, :math:`f` - labels (tensor): identity labels for current mini batch, :math:`labels` - xbm_f (tensor): features collected from XBM, :math:`xbm\_f` - xbm_labels (tensor): corresponding identity labels of xbm_f, :math:`xbm\_labels` Shape: - f: :math:`(minibatch, F)`, where :math:`F` is the feature dimension - labels: :math:`(minibatch, )` - xbm_f: :math:`(minibatch, F)` - xbm_labels: :math:`(minibatch, )` """ def __init__(self, margin=0.3, normalize_feature=False): super(TripletLossXBM, self).__init__() self.margin = margin self.normalize_feature = normalize_feature self.ranking_loss = nn.MarginRankingLoss(margin=margin) def forward(self, f, labels, xbm_f, xbm_labels): if self.normalize_feature: # equivalent to cosine similarity f = F.normalize(f) xbm_f = F.normalize(xbm_f) dist_mat = pairwise_euclidean_distance(f, xbm_f) # hard examples mining n, m = f.size(0), xbm_f.size(0) identity_mat = labels.expand(m, n).t().eq(xbm_labels.expand(n, m)).float() dist_ap, dist_an = hard_examples_mining(dist_mat, identity_mat) # Compute ranking hinge loss y = torch.ones_like(dist_an) loss = self.ranking_loss(dist_an, dist_ap, y) return loss
[docs]class SoftTripletLoss(nn.Module): r"""Soft triplet loss from `Mutual Mean-Teaching: Pseudo Label Refinery for Unsupervised Domain Adaptation on Person Re-identification (ICLR 2020) <https://arxiv.org/pdf/2001.01526.pdf>`_. Consider a triplet :math:`x,x_p,x_n` (anchor, positive, negative), corresponding features are :math:`f,f_p,f_n`. We optimize for a smaller distance between :math:`f` and :math:`f_p` and a larger distance between :math:`f` and :math:`f_n`. Inner product is adopted as their similarity measure, soft triplet loss is thus defined as .. math:: loss = \mathcal{L}_{\text{bce}}(\frac{\text{exp}(f^Tf_p)}{\text{exp}(f^Tf_p)+\text{exp}(f^Tf_n)}, 1) where :math:`\mathcal{L}_{\text{bce}}` means binary cross entropy loss. We denote the first term in above loss function as :math:`T`. When features from another teacher network can be obtained, we can calculate :math:`T_{teacher}` as labels, resulting in the following soft version .. math:: loss = \mathcal{L}_{\text{bce}}(T, T_{teacher}) Args: margin (float, optional): margin of triplet loss. If None, soft labels from another network will be adopted when computing loss. Default: None. normalize_feature (bool, optional): if True, normalize features into unit norm first before computing loss. Default: False. """ def __init__(self, margin=None, normalize_feature=False): super(SoftTripletLoss, self).__init__() self.margin = margin self.normalize_feature = normalize_feature def forward(self, features_1, features_2, labels): if self.normalize_feature: # equal to cosine similarity features_1 = F.normalize(features_1) features_2 = F.normalize(features_2) dist_mat = pairwise_euclidean_distance(features_1, features_1) assert dist_mat.size(0) == dist_mat.size(1) n = dist_mat.size(0) identity_mat = labels.expand(n, n).eq(labels.expand(n, n).t()).float() dist_ap, dist_an, ap_idxes, an_idxes = hard_examples_mining(dist_mat, identity_mat, return_idxes=True) assert dist_an.size(0) == dist_ap.size(0) triple_dist = torch.stack((dist_ap, dist_an), dim=1) triple_dist = F.log_softmax(triple_dist, dim=1) if self.margin is not None: loss = (- self.margin * triple_dist[:, 0] - (1 - self.margin) * triple_dist[:, 1]).mean() return loss dist_mat_ref = pairwise_euclidean_distance(features_2, features_2) dist_ap_ref = torch.gather(dist_mat_ref, 1, ap_idxes.view(n, 1).expand(n, n))[:, 0] dist_an_ref = torch.gather(dist_mat_ref, 1, an_idxes.view(n, 1).expand(n, n))[:, 0] triple_dist_ref = torch.stack((dist_ap_ref, dist_an_ref), dim=1) triple_dist_ref = F.softmax(triple_dist_ref, dim=1).detach() loss = (- triple_dist_ref * triple_dist).sum(dim=1).mean() return loss
[docs]class CrossEntropyLoss(nn.Module): r"""We use :math:`C` to denote the number of classes, :math:`N` to denote mini-batch size, this criterion expects unnormalized predictions :math:`y\_{logits}` of shape :math:`(N, C)` and :math:`target\_{logits}` of the same shape :math:`(N, C)`. Then we first normalize them into probability distributions among classes .. math:: y = \text{softmax}(y\_{logits}) .. math:: target = \text{softmax}(target\_{logits}) Final objective is calculated as .. math:: \text{loss} = \frac{1}{N} \sum_{i=1}^{N} \sum_{j=1}^C -target_i^j \times \text{log} (y_i^j) """ def __init__(self): super(CrossEntropyLoss, self).__init__() self.log_softmax = nn.LogSoftmax(dim=1).cuda() def forward(self, y, labels): log_prob = self.log_softmax(y) loss = (- F.softmax(labels, dim=1).detach() * log_prob).sum(dim=1).mean() return loss

Docs

Access comprehensive documentation for Transfer Learning Library

View Docs

Tutorials

Get started for Transfer Learning Library

Get Started