File tree Expand file tree Collapse file tree 2 files changed +4
-3
lines changed
Expand file tree Collapse file tree 2 files changed +4
-3
lines changed Original file line number Diff line number Diff line change @@ -116,7 +116,7 @@ class InitializerConfig(BaseConfig):
116116 init_decoder_norm : float | None = None
117117 init_encoder_norm : float | None = None
118118 init_encoder_with_decoder_transpose : bool = True
119- init_search : bool = False
119+ init_search : bool = True
120120 state : Literal ["training" , "inference" ] = "training"
121121 l1_coefficient : float | None = 0.00008
122122
@@ -140,8 +140,8 @@ class TrainerConfig(BaseConfig):
140140 "exponentialwarmup" ,
141141 ] = "constantwithwarmup"
142142 lr_end_ratio : float = 1 / 32
143- lr_warm_up_steps : int | float = 0.1
144- lr_cool_down_steps : int | float = 0.1
143+ lr_warm_up_steps : int | float = 5000
144+ lr_cool_down_steps : int | float = 0.2
145145 clip_grad_norm : float = 0.0
146146 feature_sampling_window : int = 1000
147147 total_training_tokens : int = 300_000_000
Original file line number Diff line number Diff line change @@ -35,6 +35,7 @@ def mixcoder_config() -> MixCoderConfig:
3535def initializer_config () -> InitializerConfig :
3636 return InitializerConfig (
3737 state = "training" ,
38+ init_search = False ,
3839 )
3940
4041
You can’t perform that action at this time.
0 commit comments