Source code for pahelix.networks.involution_block

#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Involution Block
"""

import paddle
from paddle import nn
import paddle.nn.functional as F
import numpy as np
import math


[docs]class Involution2D(nn.Layer): """ Involution module. Args: in_channel: The channel size of input. out_channel: The channel size of output. sigma_mapping: Sigma mapping. kernel_size: Kernel size. stride: Stride size. groups: Group size. reduce_ratio: The ratio of reduce. dilation: The dilation size. padding: The padding size. Returns: output: Tbe output of Involution2D block. References: [1] Involution: Inverting the Inherence of Convolution for Visual Recognition. https://arxiv.org/abs/2103.06255 """ def __init__(self, in_channel, out_channel, sigma_mapping=None, kernel_size=7, stride=1, groups=1, reduce_ratio=1, dilation=1, padding=3): """ Initialization """ super(Involution2D, self).__init__() self.in_channel = in_channel self.out_channel = out_channel self.kernel_size = kernel_size self.stride = stride self.groups = groups self.reduce_ratio = reduce_ratio self.dilation = dilation self.padding = padding self.sigma_mapping = nn.Sequential( nn.BatchNorm2D(num_features=self.out_channel // self.reduce_ratio), nn.ReLU() ) self.initial_mapping = nn.Conv2D(in_channels=self.in_channel, out_channels=self.out_channel, kernel_size=1, stride=1, padding=0) self.o_mapping = nn.AvgPool2D(kernel_size=self.stride, stride=self.stride) self.reduce_mapping = nn.Conv2D(in_channels=self.in_channel, out_channels=self.out_channel // self.reduce_ratio, kernel_size=1, stride=1, padding=0) self.span_mapping = nn.Conv2D(in_channels=self.out_channel // self.reduce_ratio, out_channels=self.kernel_size * self.kernel_size * self.groups, kernel_size=1, stride=1, padding=0)
[docs] def forward(self, x): """ Involution block """ batch_size, _, height, width = x.shape temp_mapping = self.initial_mapping(x) input_unfolded = F.unfold(temp_mapping, self.kernel_size, strides=self.stride, paddings=self.padding, dilations=self.dilation) input_unfolded = input_unfolded.view(batch_size, self.groups, self.out_channel // self.groups, self.kernel_size * self.kernel_size, height, width) kernel = self.span_mapping(self.sigma_mapping(self.reduce_mapping(self.o_mapping(x)))) kernel = kernel.view(batch_size, self.groups, self.kernel_size * self.kernel_size, height, width).unsqueeze(2) output = paddle.sum(kernel * input_unfolded, axis=3).view(batch_size, -1, height, width) return output