Spaces:
Running
Running
File size: 3,645 Bytes
29f689c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
Global:
device: gpu
epoch_num: 20
log_smooth_window: 20
print_batch_step: 10
output_dir: ./output/rec/u14m_filter/svtrv2_ctc_u14m_two33_tvresize/
save_epoch_step: 1
# evaluation is run every 2000 iterations
eval_epoch_step: [0, 1]
eval_batch_step: [0, 500]
cal_metric_during_train: True
pretrained_model: ./openocr_svtrv2_ch.pth
checkpoints:
use_tensorboard: false
infer_img:
# for data or label process
character_dict_path: &character_dict_path ./tools/utils/ppocr_keys_v1.txt
max_text_length: &max_text_length 25
use_space_char: &use_space_char True
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_ctc.txt
use_amp: True
project_name: svtrv2_ctc_nosgm_ds
Optimizer:
name: AdamW
lr: 0.00065 # for 4gpus bs256/gpu
weight_decay: 0.05
filter_bias_and_bn: True
LRScheduler:
name: OneCycleLR
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
cycle_momentum: False
Architecture:
model_type: rec
algorithm: SVTRv2_server
Transform:
Encoder:
name: SVTRv2LNConvTwo33
use_pos_embed: False
out_channels: 256
dims: [128, 256, 384]
depths: [6, 6, 6]
num_heads: [4, 8, 12]
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
kernel_sizes: [[5,5,5,5,5,5], [5,5,5,5,5,5], [-1]]
num_convs: [[1,1,1,1,1,1], [1,1,1,1,1,1], [-1]]
sub_k: [[2, 1], [2, 1], [-1, -1]]
last_stage: False
feat2d: True
pope_bias: True
Decoder:
name: CTCDecoder
svtr_encoder:
dims: 256
depth: 2
hidden_dims: 256
kernel_size: [1, 3]
use_guide: True
Loss:
name: CTCLoss
zero_infinity: True
PostProcess:
name: CTCLabelDecode
character_dict_path: *character_dict_path
Metric:
name: RecMetric
main_indicator: acc
ignore_space: False
# is_filter: True
Train:
dataset:
name: RatioDataSetTVResize
ds_width: True
padding: False
base_shape: [[32, 32], [64, 32], [96, 32], [128, 32]]
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
'../Union14M-L-LMDB-Filtered/filter_train_hard',
'../Union14M-L-LMDB-Filtered/filter_train_medium',
'../Union14M-L-LMDB-Filtered/filter_train_normal',
'../Union14M-L-LMDB-Filtered/filter_train_easy',
]
transforms:
- DecodeImagePIL: # load image
img_mode: RGB
- PARSeqAugPIL:
- CTCLabelEncode: # Class handling label
character_dict_path: *character_dict_path
use_space_char: *use_space_char
max_text_length: *max_text_length
- KeepKeys:
keep_keys: ['image', 'label', 'length']
sampler:
name: RatioSampler
scales: [[128, 32]] # w, h
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
first_bs: &bs 256
fix_bs: false
divided_factor: [4, 16] # w, h
is_training: True
loader:
shuffle: True
batch_size_per_card: *bs
drop_last: True
max_ratio: 4
num_workers: 4
Eval:
dataset:
name: LMDBDataSet
data_dir: ../evaluation
transforms:
- DecodeImage: # load image
img_mode: BGR
- CTCLabelEncode: # Class handling label
character_dict_path: *character_dict_path
use_space_char: *use_space_char
max_text_length: *max_text_length
- RecDynamicResize:
image_shape: [48, 320]
padding: False
- KeepKeys:
keep_keys: ['image', 'label', 'length']
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 4
|