Spaces:

CallMeDaniel
/

TaiwanOCR_CertificateofDiagnosis

Sleeping

TaiwanOCR_CertificateofDiagnosis / ppocr /modeling /necks /fce_fpn.py

Danieldu

add code

a89d9fd about 1 year ago

11.2 kB

	# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""
	This code is refer from:
	https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.3/ppdet/modeling/necks/fpn.py
	"""

	import paddle.nn as nn
	import paddle.nn.functional as F
	from paddle import ParamAttr
	from paddle.nn.initializer import XavierUniform
	from paddle.nn.initializer import Normal
	from paddle.regularizer import L2Decay

	__all__ = ['FCEFPN']


	class ConvNormLayer(nn.Layer):
	def __init__(self,
	ch_in,
	ch_out,
	filter_size,
	stride,
	groups=1,
	norm_type='bn',
	norm_decay=0.,
	norm_groups=32,
	lr_scale=1.,
	freeze_norm=False,
	initializer=Normal(
	mean=0., std=0.01)):
	super(ConvNormLayer, self).__init__()
	assert norm_type in ['bn', 'sync_bn', 'gn']

	bias_attr = False

	self.conv = nn.Conv2D(
	in_channels=ch_in,
	out_channels=ch_out,
	kernel_size=filter_size,
	stride=stride,
	padding=(filter_size - 1) // 2,
	groups=groups,
	weight_attr=ParamAttr(
	initializer=initializer, learning_rate=1.),
	bias_attr=bias_attr)

	norm_lr = 0. if freeze_norm else 1.
	param_attr = ParamAttr(
	learning_rate=norm_lr,
	regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
	bias_attr = ParamAttr(
	learning_rate=norm_lr,
	regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
	if norm_type == 'bn':
	self.norm = nn.BatchNorm2D(
	ch_out, weight_attr=param_attr, bias_attr=bias_attr)
	elif norm_type == 'sync_bn':
	self.norm = nn.SyncBatchNorm(
	ch_out, weight_attr=param_attr, bias_attr=bias_attr)
	elif norm_type == 'gn':
	self.norm = nn.GroupNorm(
	num_groups=norm_groups,
	num_channels=ch_out,
	weight_attr=param_attr,
	bias_attr=bias_attr)

	def forward(self, inputs):
	out = self.conv(inputs)
	out = self.norm(out)
	return out


	class FCEFPN(nn.Layer):
	"""
	Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
	Args:
	in_channels (list[int]): input channels of each level which can be
	derived from the output shape of backbone by from_config
	out_channels (list[int]): output channel of each level
	spatial_scales (list[float]): the spatial scales between input feature
	maps and original input image which can be derived from the output
	shape of backbone by from_config
	has_extra_convs (bool): whether to add extra conv to the last level.
	default False
	extra_stage (int): the number of extra stages added to the last level.
	default 1
	use_c5 (bool): Whether to use c5 as the input of extra stage,
	otherwise p5 is used. default True
	norm_type (string\|None): The normalization type in FPN module. If
	norm_type is None, norm will not be used after conv and if
	norm_type is string, bn, gn, sync_bn are available. default None
	norm_decay (float): weight decay for normalization layer weights.
	default 0.
	freeze_norm (bool): whether to freeze normalization layer.
	default False
	relu_before_extra_convs (bool): whether to add relu before extra convs.
	default False

	"""

	def __init__(self,
	in_channels,
	out_channels,
	spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
	has_extra_convs=False,
	extra_stage=1,
	use_c5=True,
	norm_type=None,
	norm_decay=0.,
	freeze_norm=False,
	relu_before_extra_convs=True):
	super(FCEFPN, self).__init__()
	self.out_channels = out_channels
	for s in range(extra_stage):
	spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
	self.spatial_scales = spatial_scales
	self.has_extra_convs = has_extra_convs
	self.extra_stage = extra_stage
	self.use_c5 = use_c5
	self.relu_before_extra_convs = relu_before_extra_convs
	self.norm_type = norm_type
	self.norm_decay = norm_decay
	self.freeze_norm = freeze_norm

	self.lateral_convs = []
	self.fpn_convs = []
	fan = out_channels * 3 * 3

	# stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
	# 0 <= st_stage < ed_stage <= 3
	st_stage = 4 - len(in_channels)
	ed_stage = st_stage + len(in_channels) - 1
	for i in range(st_stage, ed_stage + 1):
	if i == 3:
	lateral_name = 'fpn_inner_res5_sum'
	else:
	lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
	in_c = in_channels[i - st_stage]
	if self.norm_type is not None:
	lateral = self.add_sublayer(
	lateral_name,
	ConvNormLayer(
	ch_in=in_c,
	ch_out=out_channels,
	filter_size=1,
	stride=1,
	norm_type=self.norm_type,
	norm_decay=self.norm_decay,
	freeze_norm=self.freeze_norm,
	initializer=XavierUniform(fan_out=in_c)))
	else:
	lateral = self.add_sublayer(
	lateral_name,
	nn.Conv2D(
	in_channels=in_c,
	out_channels=out_channels,
	kernel_size=1,
	weight_attr=ParamAttr(
	initializer=XavierUniform(fan_out=in_c))))
	self.lateral_convs.append(lateral)

	for i in range(st_stage, ed_stage + 1):
	fpn_name = 'fpn_res{}_sum'.format(i + 2)
	if self.norm_type is not None:
	fpn_conv = self.add_sublayer(
	fpn_name,
	ConvNormLayer(
	ch_in=out_channels,
	ch_out=out_channels,
	filter_size=3,
	stride=1,
	norm_type=self.norm_type,
	norm_decay=self.norm_decay,
	freeze_norm=self.freeze_norm,
	initializer=XavierUniform(fan_out=fan)))
	else:
	fpn_conv = self.add_sublayer(
	fpn_name,
	nn.Conv2D(
	in_channels=out_channels,
	out_channels=out_channels,
	kernel_size=3,
	padding=1,
	weight_attr=ParamAttr(
	initializer=XavierUniform(fan_out=fan))))
	self.fpn_convs.append(fpn_conv)

	# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
	if self.has_extra_convs:
	for i in range(self.extra_stage):
	lvl = ed_stage + 1 + i
	if i == 0 and self.use_c5:
	in_c = in_channels[-1]
	else:
	in_c = out_channels
	extra_fpn_name = 'fpn_{}'.format(lvl + 2)
	if self.norm_type is not None:
	extra_fpn_conv = self.add_sublayer(
	extra_fpn_name,
	ConvNormLayer(
	ch_in=in_c,
	ch_out=out_channels,
	filter_size=3,
	stride=2,
	norm_type=self.norm_type,
	norm_decay=self.norm_decay,
	freeze_norm=self.freeze_norm,
	initializer=XavierUniform(fan_out=fan)))
	else:
	extra_fpn_conv = self.add_sublayer(
	extra_fpn_name,
	nn.Conv2D(
	in_channels=in_c,
	out_channels=out_channels,
	kernel_size=3,
	stride=2,
	padding=1,
	weight_attr=ParamAttr(
	initializer=XavierUniform(fan_out=fan))))
	self.fpn_convs.append(extra_fpn_conv)

	@classmethod
	def from_config(cls, cfg, input_shape):
	return {
	'in_channels': [i.channels for i in input_shape],
	'spatial_scales': [1.0 / i.stride for i in input_shape],
	}

	def forward(self, body_feats):
	laterals = []
	num_levels = len(body_feats)

	for i in range(num_levels):
	laterals.append(self.lateral_convs[i](body_feats[i]))

	for i in range(1, num_levels):
	lvl = num_levels - i
	upsample = F.interpolate(
	laterals[lvl],
	scale_factor=2.,
	mode='nearest', )
	laterals[lvl - 1] += upsample

	fpn_output = []
	for lvl in range(num_levels):
	fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))

	if self.extra_stage > 0:
	# use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
	if not self.has_extra_convs:
	assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs'
	fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
	# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
	else:
	if self.use_c5:
	extra_source = body_feats[-1]
	else:
	extra_source = fpn_output[-1]
	fpn_output.append(self.fpn_convs[num_levels](extra_source))

	for i in range(1, self.extra_stage):
	if self.relu_before_extra_convs:
	fpn_output.append(self.fpn_convs[num_levels + i](F.relu(
	fpn_output[-1])))
	else:
	fpn_output.append(self.fpn_convs[num_levels + i](
	fpn_output[-1]))
	return fpn_output