# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn.functional as F

from ..builder import HEADS
from .cls_head import ClsHead
from .stiefel import StiefelLinear


@HEADS.register_module()
class StiefelLinearClsHead(ClsHead):
    """Linear classifier head.

    Args:
        num_classes (int): Number of categories excluding the background
            category.
        in_channels (int): Number of channels in the input feature map.
        init_cfg (dict | optional): The extra init config of layers.
            Defaults to use dict(type='Normal', layer='Linear', std=0.01).
    """

    def __init__(self,
                 num_classes,
                 in_channels,
                 n_components=10,
                 kappa=10.,
                 relu=True,
                 normalize=True,
                 init_cfg=dict(type='Normal', layer='Linear', std=0.01),
                 *args,
                 **kwargs):
        super(StiefelLinearClsHead, self).__init__(init_cfg=init_cfg, *args, **kwargs)

        self.in_channels = in_channels
        self.num_classes = num_classes
        self.n_components = n_components
        self.kappa = kappa
        self.relu = relu
        self.normalize = normalize

        if self.num_classes <= 0:
            raise ValueError(
                f'num_classes={num_classes} must be a positive integer')

        self.fc = StiefelLinear(self.in_channels, self.num_classes * n_components, n_components)

    def pre_logits(self, x):
        if isinstance(x, tuple):
            x = x[-1]
        return x

    def _forward(self, x):
        x = self.pre_logits(x)
        if self.normalize == 'hyperbolic':
            norm = torch.norm(x, dim=1, keepdim=True)
            x = torch.tanh(norm * 0.5) * x / norm
        elif self.normalize:
            x = F.normalize(x, dim=1, p='fro')
        if self.normalize:
            x = F.normalize(x, dim=1, p='fro')
        align = self.fc(x).view(-1, self.num_classes, self.n_components)
        if self.relu:
            cls_score = (F.relu(align, inplace=False) * align).sum(dim=2)
        else:
            cls_score = align.pow(2).sum(dim=2)
        return cls_score * self.kappa

    def simple_test(self, x, softmax=True, post_process=True):
        """Inference without augmentation.

        Args:
            x (tuple[Tensor]): The input features.
                Multi-stage inputs are acceptable but only the last stage will
                be used to classify. The shape of every item should be
                ``(num_samples, in_channels)``.
            softmax (bool): Whether to softmax the classification score.
            post_process (bool): Whether to do post processing the
                inference results. It will convert the output to a list.

        Returns:
            Tensor | list: The inference results.

                - If no post processing, the output is a tensor with shape
                  ``(num_samples, num_classes)``.
                - If post processing, the output is a multi-dimentional list of
                  float and the dimensions are ``(num_samples, num_classes)``.
        """
        cls_score = self._forward(x)

        if softmax:
            pred = (
                F.softmax(cls_score, dim=1) if cls_score is not None else None)
        else:
            pred = cls_score

        if post_process:
            return self.post_process(pred)
        else:
            return pred

    def forward_train(self, x, gt_label, **kwargs):
        cls_score = self._forward(x)
        losses = self.loss(cls_score, gt_label, **kwargs)
        return losses
